diff options
47 files changed, 814 insertions, 301 deletions
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h index eb897f00e3..d1d5ad9715 100644 --- a/compiler/dex/bb_optimizations.h +++ b/compiler/dex/bb_optimizations.h @@ -71,26 +71,28 @@ class CacheMethodLoweringInfo : public PassME { }; /** - * @class CallInlining - * @brief Perform method inlining pass. + * @class SpecialMethodInliner + * @brief Performs method inlining pass on special kinds of methods. + * @details Special methods are methods that fall in one of the following categories: + * empty, instance getter, instance setter, argument return, and constant return. */ -class CallInlining : public PassME { +class SpecialMethodInliner : public PassME { public: - CallInlining() : PassME("CallInlining") { + SpecialMethodInliner() : PassME("SpecialMethodInliner") { } bool Gate(const PassDataHolder* data) const { DCHECK(data != nullptr); CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; DCHECK(cUnit != nullptr); - return cUnit->mir_graph->InlineCallsGate(); + return cUnit->mir_graph->InlineSpecialMethodsGate(); } void Start(PassDataHolder* data) const { DCHECK(data != nullptr); CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit; DCHECK(cUnit != nullptr); - cUnit->mir_graph->InlineCallsStart(); + cUnit->mir_graph->InlineSpecialMethodsStart(); } bool Worker(const PassDataHolder* data) const { @@ -100,7 +102,7 @@ class CallInlining : public PassME { DCHECK(cUnit != nullptr); BasicBlock* bb = pass_me_data_holder->bb; DCHECK(bb != nullptr); - cUnit->mir_graph->InlineCalls(bb); + cUnit->mir_graph->InlineSpecialMethods(bb); // No need of repeating, so just return false. return false; } @@ -109,7 +111,7 @@ class CallInlining : public PassME { DCHECK(data != nullptr); CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit; DCHECK(cUnit != nullptr); - cUnit->mir_graph->InlineCallsEnd(); + cUnit->mir_graph->InlineSpecialMethodsEnd(); } }; diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index b16cf14b02..711743d69b 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -97,14 +97,6 @@ static constexpr uint32_t kDisabledOptimizationsPerISA[] = { // 2 = kArm64. TODO(Arm64): enable optimizations once backend is mature enough. (1 << kLoadStoreElimination) | (1 << kLoadHoisting) | - (1 << kSuppressLoads) | - (1 << kClassInitCheckElimination) | - (1 << kTrackLiveTemps) | - (1 << kSafeOptimizations) | - (1 << kBBOpt) | - (1 << kMatch) | - (1 << kPromoteCompilerTemps) | - (1 << kSuppressExceptionEdges) | 0, // 3 = kThumb2. 0, @@ -582,7 +574,7 @@ static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, // Check if we support the byte code. if (std::find(unsupport_list, unsupport_list + unsupport_list_size, opcode) != unsupport_list + unsupport_list_size) { - if (!cu.mir_graph->IsPseudoMirOp(opcode)) { + if (!MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { VLOG(compiler) << "Unsupported dalvik byte code : " << mir->dalvikInsn.opcode; } else { diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc index e372206228..3de448344a 100644 --- a/compiler/dex/mir_analysis.cc +++ b/compiler/dex/mir_analysis.cc @@ -902,7 +902,7 @@ void MIRGraph::AnalyzeBlock(BasicBlock* bb, MethodStats* stats) { while (!done) { tbb->visited = true; for (MIR* mir = tbb->first_mir_insn; mir != NULL; mir = mir->next) { - if (IsPseudoMirOp(mir->dalvikInsn.opcode)) { + if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) { // Skip any MIR pseudo-op. continue; } diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index bc99a272a6..b82c5c7f00 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -909,6 +909,16 @@ void MIRGraph::HandleDef(ArenaBitVector* def_v, int dalvik_reg_id) { def_v->SetBit(dalvik_reg_id); } +void MIRGraph::HandleExtended(ArenaBitVector* use_v, ArenaBitVector* def_v, + ArenaBitVector* live_in_v, + const MIR::DecodedInstruction& d_insn) { + switch (static_cast<int>(d_insn.opcode)) { + default: + LOG(ERROR) << "Unexpected Extended Opcode " << d_insn.opcode; + break; + } +} + /* * Find out live-in variables for natural loops. Variables that are live-in in * the main loop body are considered to be defined in the entry block. @@ -966,6 +976,9 @@ bool MIRGraph::FindLocalLiveIn(BasicBlock* bb) { HandleDef(def_v, d_insn->vA+1); } } + if (df_attributes & DF_FORMAT_EXTENDED) { + HandleExtended(use_v, def_v, live_in_v, mir->dalvikInsn); + } } return true; } @@ -1048,6 +1061,14 @@ void MIRGraph::DataFlowSSAFormat3RC(MIR* mir) { } } +void MIRGraph::DataFlowSSAFormatExtended(MIR* mir) { + switch (static_cast<int>(mir->dalvikInsn.opcode)) { + default: + LOG(ERROR) << "Missing case for extended MIR: " << mir->dalvikInsn.opcode; + break; + } +} + /* Entry function to convert a block into SSA representation */ bool MIRGraph::DoSSAConversion(BasicBlock* bb) { MIR* mir; @@ -1063,7 +1084,7 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) { uint64_t df_attributes = GetDataFlowAttributes(mir); // If not a pseudo-op, note non-leaf or can throw - if (!IsPseudoMirOp(mir->dalvikInsn.opcode)) { + if (!MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) { int flags = Instruction::FlagsOf(mir->dalvikInsn.opcode); if ((flags & Instruction::kInvoke) != 0 && (mir->optimization_flags & MIR_INLINED) == 0) { @@ -1083,6 +1104,11 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) { continue; } + if (df_attributes & DF_FORMAT_EXTENDED) { + DataFlowSSAFormatExtended(mir); + continue; + } + if (df_attributes & DF_HAS_USES) { if (df_attributes & DF_UA) { num_uses++; diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 4fbace26e7..1c8a9b5079 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -193,14 +193,16 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset, bottom_block->successor_block_list_type = orig_block->successor_block_list_type; bottom_block->successor_blocks = orig_block->successor_blocks; orig_block->successor_block_list_type = kNotUsed; - orig_block->successor_blocks = NULL; + orig_block->successor_blocks = nullptr; GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bottom_block->successor_blocks); while (true) { SuccessorBlockInfo* successor_block_info = iterator.Next(); - if (successor_block_info == NULL) break; + if (successor_block_info == nullptr) break; BasicBlock* bb = GetBasicBlock(successor_block_info->block); - bb->predecessors->Delete(orig_block->id); - bb->predecessors->Insert(bottom_block->id); + if (bb != nullptr) { + bb->predecessors->Delete(orig_block->id); + bb->predecessors->Insert(bottom_block->id); + } } } @@ -222,7 +224,7 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset, DCHECK(insn == bottom_block->first_mir_insn); DCHECK_EQ(insn->offset, bottom_block->start_offset); DCHECK(static_cast<int>(insn->dalvikInsn.opcode) == kMirOpCheck || - !IsPseudoMirOp(insn->dalvikInsn.opcode)); + !MIR::DecodedInstruction::IsPseudoMirOp(insn->dalvikInsn.opcode)); DCHECK_EQ(dex_pc_to_block_map_.Get(insn->offset), orig_block->id); MIR* p = insn; dex_pc_to_block_map_.Put(p->offset, bottom_block->id); @@ -237,7 +239,7 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset, * CHECK and work portions. Since the 2nd half of a split operation is always * the first in a BasicBlock, we can't hit it here. */ - if ((opcode == kMirOpCheck) || !IsPseudoMirOp(opcode)) { + if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { DCHECK_EQ(dex_pc_to_block_map_.Get(p->offset), orig_block->id); dex_pc_to_block_map_.Put(p->offset, bottom_block->id); } @@ -861,11 +863,17 @@ uint64_t MIRGraph::GetDataFlowAttributes(MIR* mir) { /* Dump the CFG into a DOT graph */ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suffix) { FILE* file; + static AtomicInteger cnt(0); + + // Increment counter to get a unique file number. + cnt++; + std::string fname(PrettyMethod(cu_->method_idx, *cu_->dex_file)); ReplaceSpecialChars(fname); - fname = StringPrintf("%s%s%x%s.dot", dir_prefix, fname.c_str(), + fname = StringPrintf("%s%s%x%s_%d.dot", dir_prefix, fname.c_str(), GetBasicBlock(GetEntryBlock()->fall_through)->start_offset, - suffix == nullptr ? "" : suffix); + suffix == nullptr ? "" : suffix, + cnt.LoadRelaxed()); file = fopen(fname.c_str(), "w"); if (file == NULL) { return; @@ -882,6 +890,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff BasicBlock* bb = GetBasicBlock(block_idx); if (bb == NULL) continue; if (bb->block_type == kDead) continue; + if (bb->hidden) continue; if (bb->block_type == kEntryBlock) { fprintf(file, " entry_%d [shape=Mdiamond];\n", bb->id); } else if (bb->block_type == kExitBlock) { @@ -916,7 +925,8 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff } else { fprintf(file, " {%04x %s %s %s %s\\l}%s\\\n", mir->offset, mir->ssa_rep ? GetDalvikDisassembly(mir) : - !IsPseudoMirOp(opcode) ? Instruction::Name(mir->dalvikInsn.opcode) : + !MIR::DecodedInstruction::IsPseudoMirOp(opcode) ? + Instruction::Name(mir->dalvikInsn.opcode) : extended_mir_op_names_[opcode - kMirOpFirst], (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ", (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ", @@ -1222,7 +1232,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) { nop = true; } - if (IsPseudoMirOp(opcode)) { + if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { str.append(extended_mir_op_names_[opcode - kMirOpFirst]); } else { dalvik_format = Instruction::FormatOf(insn.opcode); @@ -1693,11 +1703,13 @@ BasicBlock* ChildBlockIterator::Next() { // We visited both taken and fallthrough. Now check if we have successors we need to visit. if (have_successors_ == true) { // Get information about next successor block. - SuccessorBlockInfo* successor_block_info = successor_iter_.Next(); - - // If we don't have anymore successors, return nullptr. - if (successor_block_info != nullptr) { - return mir_graph_->GetBasicBlock(successor_block_info->block); + for (SuccessorBlockInfo* successor_block_info = successor_iter_.Next(); + successor_block_info != nullptr; + successor_block_info = successor_iter_.Next()) { + // If block was replaced by zero block, take next one. + if (successor_block_info->block != NullBasicBlockId) { + return mir_graph_->GetBasicBlock(successor_block_info->block); + } } } diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index d09732891c..1556a19da7 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -80,6 +80,7 @@ enum DataFlowAttributePos { kSetsConst, kFormat35c, kFormat3rc, + kFormatExtended, // Extended format for extended MIRs. kNullCheckSrc0, // Null check of uses[0]. kNullCheckSrc1, // Null check of uses[1]. kNullCheckSrc2, // Null check of uses[2]. @@ -118,6 +119,7 @@ enum DataFlowAttributePos { #define DF_SETS_CONST (UINT64_C(1) << kSetsConst) #define DF_FORMAT_35C (UINT64_C(1) << kFormat35c) #define DF_FORMAT_3RC (UINT64_C(1) << kFormat3rc) +#define DF_FORMAT_EXTENDED (UINT64_C(1) << kFormatExtended) #define DF_NULL_CHK_0 (UINT64_C(1) << kNullCheckSrc0) #define DF_NULL_CHK_1 (UINT64_C(1) << kNullCheckSrc1) #define DF_NULL_CHK_2 (UINT64_C(1) << kNullCheckSrc2) @@ -284,34 +286,46 @@ struct MIR { */ bool GetConstant(int64_t* ptr_value, bool* wide) const; + static bool IsPseudoMirOp(Instruction::Code opcode) { + return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst); + } + + static bool IsPseudoMirOp(int opcode) { + return opcode >= static_cast<int>(kMirOpFirst); + } + + bool IsInvoke() const { + return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kInvoke) == Instruction::kInvoke); + } + bool IsStore() const { - return ((Instruction::FlagsOf(opcode) & Instruction::kStore) == Instruction::kStore); + return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kStore) == Instruction::kStore); } bool IsLoad() const { - return ((Instruction::FlagsOf(opcode) & Instruction::kLoad) == Instruction::kLoad); + return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kLoad) == Instruction::kLoad); } bool IsConditionalBranch() const { - return (Instruction::FlagsOf(opcode) == (Instruction::kContinue | Instruction::kBranch)); + return !IsPseudoMirOp(opcode) && (Instruction::FlagsOf(opcode) == (Instruction::kContinue | Instruction::kBranch)); } /** * @brief Is the register C component of the decoded instruction a constant? */ bool IsCFieldOrConstant() const { - return ((Instruction::FlagsOf(opcode) & Instruction::kRegCFieldOrConstant) == Instruction::kRegCFieldOrConstant); + return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kRegCFieldOrConstant) == Instruction::kRegCFieldOrConstant); } /** * @brief Is the register C component of the decoded instruction a constant? */ bool IsBFieldOrConstant() const { - return ((Instruction::FlagsOf(opcode) & Instruction::kRegBFieldOrConstant) == Instruction::kRegBFieldOrConstant); + return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kRegBFieldOrConstant) == Instruction::kRegBFieldOrConstant); } bool IsCast() const { - return ((Instruction::FlagsOf(opcode) & Instruction::kCast) == Instruction::kCast); + return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kCast) == Instruction::kCast); } /** @@ -321,11 +335,11 @@ struct MIR { * when crossing such an instruction. */ bool Clobbers() const { - return ((Instruction::FlagsOf(opcode) & Instruction::kClobber) == Instruction::kClobber); + return !IsPseudoMirOp(opcode) && ((Instruction::FlagsOf(opcode) & Instruction::kClobber) == Instruction::kClobber); } bool IsLinear() const { - return (Instruction::FlagsOf(opcode) & (Instruction::kAdd | Instruction::kSubtract)) != 0; + return !IsPseudoMirOp(opcode) && (Instruction::FlagsOf(opcode) & (Instruction::kAdd | Instruction::kSubtract)) != 0; } } dalvikInsn; @@ -877,14 +891,6 @@ class MIRGraph { return backward_branches_ + forward_branches_; } - static bool IsPseudoMirOp(Instruction::Code opcode) { - return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst); - } - - static bool IsPseudoMirOp(int opcode) { - return opcode >= static_cast<int>(kMirOpFirst); - } - // Is this vreg in the in set? bool IsInVReg(int vreg) { return (vreg >= cu_->num_regs); @@ -956,10 +962,10 @@ class MIRGraph { void ComputeTopologicalSortOrder(); BasicBlock* CreateNewBB(BBType block_type); - bool InlineCallsGate(); - void InlineCallsStart(); - void InlineCalls(BasicBlock* bb); - void InlineCallsEnd(); + bool InlineSpecialMethodsGate(); + void InlineSpecialMethodsStart(); + void InlineSpecialMethods(BasicBlock* bb); + void InlineSpecialMethodsEnd(); /** * @brief Perform the initial preparation for the Method Uses. @@ -1059,6 +1065,9 @@ class MIRGraph { void HandleLiveInUse(ArenaBitVector* use_v, ArenaBitVector* def_v, ArenaBitVector* live_in_v, int dalvik_reg_id); void HandleDef(ArenaBitVector* def_v, int dalvik_reg_id); + void HandleExtended(ArenaBitVector* use_v, ArenaBitVector* def_v, + ArenaBitVector* live_in_v, + const MIR::DecodedInstruction& d_insn); bool DoSSAConversion(BasicBlock* bb); bool InvokeUsesMethodStar(MIR* mir); int ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction); @@ -1080,6 +1089,7 @@ class MIRGraph { void HandleSSAUse(int* uses, int dalvik_reg, int reg_index); void DataFlowSSAFormat35C(MIR* mir); void DataFlowSSAFormat3RC(MIR* mir); + void DataFlowSSAFormatExtended(MIR* mir); bool FindLocalLiveIn(BasicBlock* bb); bool VerifyPredInfo(BasicBlock* bb); BasicBlock* NeedsVisit(BasicBlock* bb); diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index dc1057f277..869c48f66c 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -137,7 +137,7 @@ MIR* MIRGraph::FindMoveResult(BasicBlock* bb, MIR* mir) { break; } // Keep going if pseudo op, otherwise terminate - if (IsPseudoMirOp(mir->dalvikInsn.opcode)) { + if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) { mir = AdvanceMIR(&tbb, mir); } else { mir = NULL; @@ -877,7 +877,7 @@ bool MIRGraph::EliminateNullChecksAndInferTypes(BasicBlock* bb) { struct BasicBlock* next_bb = GetBasicBlock(bb->fall_through); for (MIR* tmir = next_bb->first_mir_insn; tmir != NULL; tmir =tmir->next) { - if (IsPseudoMirOp(tmir->dalvikInsn.opcode)) { + if (MIR::DecodedInstruction::IsPseudoMirOp(tmir->dalvikInsn.opcode)) { continue; } // First non-pseudo should be MOVE_RESULT_OBJECT @@ -1220,7 +1220,7 @@ void MIRGraph::ComputeInlineIFieldLoweringInfo(uint16_t field_idx, MIR* invoke, iget_or_iput->meta.ifield_lowering_info = field_info_index; } -bool MIRGraph::InlineCallsGate() { +bool MIRGraph::InlineSpecialMethodsGate() { if ((cu_->disable_opt & (1 << kSuppressMethodInlining)) != 0 || method_lowering_infos_.Size() == 0u) { return false; @@ -1232,7 +1232,7 @@ bool MIRGraph::InlineCallsGate() { return true; } -void MIRGraph::InlineCallsStart() { +void MIRGraph::InlineSpecialMethodsStart() { // Prepare for inlining getters/setters. Since we're inlining at most 1 IGET/IPUT from // each INVOKE, we can index the data by the MIR::meta::method_lowering_info index. @@ -1246,12 +1246,12 @@ void MIRGraph::InlineCallsStart() { temp_bit_vector_size_ * sizeof(*temp_insn_data_), kArenaAllocGrowableArray)); } -void MIRGraph::InlineCalls(BasicBlock* bb) { +void MIRGraph::InlineSpecialMethods(BasicBlock* bb) { if (bb->block_type != kDalvikByteCode) { return; } for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { - if (IsPseudoMirOp(mir->dalvikInsn.opcode)) { + if (MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) { continue; } if (!(Instruction::FlagsOf(mir->dalvikInsn.opcode) & Instruction::kInvoke)) { @@ -1270,17 +1270,17 @@ void MIRGraph::InlineCalls(BasicBlock* bb) { MethodReference target = method_info.GetTargetMethod(); if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(target.dex_file) ->GenInline(this, bb, mir, target.dex_method_index)) { - if (cu_->verbose) { - LOG(INFO) << "In \"" << PrettyMethod(cu_->method_idx, *cu_->dex_file) - << "\" @0x" << std::hex << mir->offset - << " inlined " << method_info.GetInvokeType() << " (" << sharp_type << ") call to \"" - << PrettyMethod(target.dex_method_index, *target.dex_file) << "\""; + if (cu_->verbose || cu_->print_pass) { + LOG(INFO) << "SpecialMethodInliner: Inlined " << method_info.GetInvokeType() << " (" + << sharp_type << ") call to \"" << PrettyMethod(target.dex_method_index, *target.dex_file) + << "\" from \"" << PrettyMethod(cu_->method_idx, *cu_->dex_file) + << "\" @0x" << std::hex << mir->offset; } } } } -void MIRGraph::InlineCallsEnd() { +void MIRGraph::InlineSpecialMethodsEnd() { DCHECK(temp_insn_data_ != nullptr); temp_insn_data_ = nullptr; DCHECK(temp_bit_vector_ != nullptr); diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc index 4c9bed65dc..c72a4a667e 100644 --- a/compiler/dex/pass_driver_me_opts.cc +++ b/compiler/dex/pass_driver_me_opts.cc @@ -35,7 +35,7 @@ template<> const Pass* const PassDriver<PassDriverMEOpts>::g_passes[] = { GetPassInstance<CacheFieldLoweringInfo>(), GetPassInstance<CacheMethodLoweringInfo>(), - GetPassInstance<CallInlining>(), + GetPassInstance<SpecialMethodInliner>(), GetPassInstance<CodeLayout>(), GetPassInstance<NullCheckEliminationAndTypeInference>(), GetPassInstance<ClassInitCheckElimination>(), diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index e8f5cb9f09..3ee3e2e61d 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -91,17 +91,121 @@ void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { RegLocation rl_dest = mir_graph_->GetDest(mir); RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg; RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg; + rl_src = LoadValue(rl_src, src_reg_class); + // rl_src may be aliased with rl_result/rl_dest, so do compare early. + OpRegImm(kOpCmp, rl_src.reg, 0); + ArmConditionCode code = ArmConditionEncoding(mir->meta.ccode); - RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; - RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; - rl_true = LoadValue(rl_true, result_reg_class); - rl_false = LoadValue(rl_false, result_reg_class); - rl_result = EvalLoc(rl_dest, result_reg_class, true); - OpRegImm(kOpCmp, rl_src.reg, 0); - NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_true.reg.GetReg(), - rl_false.reg.GetReg(), code); + // The kMirOpSelect has two variants, one for constants and one for moves. + bool is_wide = rl_dest.ref || rl_dest.wide; + + if (mir->ssa_rep->num_uses == 1) { + uint32_t true_val = mir->dalvikInsn.vB; + uint32_t false_val = mir->dalvikInsn.vC; + + int opcode; // The opcode. + int left_op, right_op; // The operands. + bool rl_result_evaled = false; + + // Check some simple cases. + // TODO: Improve this. + int zero_reg = (is_wide ? rs_xzr : rs_wzr).GetReg(); + + if ((true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0)) { + // CSInc cheap based on wzr. + if (true_val == 1) { + // Negate. + code = ArmConditionEncoding(NegateComparison(mir->meta.ccode)); + } + + left_op = right_op = zero_reg; + opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc; + } else if ((true_val == 0 && false_val == 0xFFFFFFFF) || + (true_val == 0xFFFFFFFF && false_val == 0)) { + // CSneg cheap based on wzr. + if (true_val == 0xFFFFFFFF) { + // Negate. + code = ArmConditionEncoding(NegateComparison(mir->meta.ccode)); + } + + left_op = right_op = zero_reg; + opcode = is_wide ? WIDE(kA64Csneg4rrrc) : kA64Csneg4rrrc; + } else if (true_val == 0 || false_val == 0) { + // Csel half cheap based on wzr. + rl_result = EvalLoc(rl_dest, result_reg_class, true); + rl_result_evaled = true; + if (false_val == 0) { + // Negate. + code = ArmConditionEncoding(NegateComparison(mir->meta.ccode)); + } + LoadConstantNoClobber(rl_result.reg, true_val == 0 ? false_val : true_val); + left_op = zero_reg; + right_op = rl_result.reg.GetReg(); + opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc; + } else if (true_val == 1 || false_val == 1) { + // CSInc half cheap based on wzr. + rl_result = EvalLoc(rl_dest, result_reg_class, true); + rl_result_evaled = true; + if (true_val == 1) { + // Negate. + code = ArmConditionEncoding(NegateComparison(mir->meta.ccode)); + } + LoadConstantNoClobber(rl_result.reg, true_val == 1 ? false_val : true_val); + left_op = rl_result.reg.GetReg(); + right_op = zero_reg; + opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc; + } else if (true_val == 0xFFFFFFFF || false_val == 0xFFFFFFFF) { + // CSneg half cheap based on wzr. + rl_result = EvalLoc(rl_dest, result_reg_class, true); + rl_result_evaled = true; + if (true_val == 0xFFFFFFFF) { + // Negate. + code = ArmConditionEncoding(NegateComparison(mir->meta.ccode)); + } + LoadConstantNoClobber(rl_result.reg, true_val == 0xFFFFFFFF ? false_val : true_val); + left_op = rl_result.reg.GetReg(); + right_op = zero_reg; + opcode = is_wide ? WIDE(kA64Csneg4rrrc) : kA64Csneg4rrrc; + } else { + // Csel. The rest. Use rl_result and a temp. + // TODO: To minimize the constants being loaded, check whether one can be inexpensively + // loaded as n - 1 or ~n. + rl_result = EvalLoc(rl_dest, result_reg_class, true); + rl_result_evaled = true; + LoadConstantNoClobber(rl_result.reg, true_val); + RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class); + if (rl_dest.wide) { + if (t_reg2.Is32Bit()) { + t_reg2 = As64BitReg(t_reg2); + } + } + LoadConstantNoClobber(t_reg2, false_val); + + // Use csel. + left_op = rl_result.reg.GetReg(); + right_op = t_reg2.GetReg(); + opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc; + } + + if (!rl_result_evaled) { + rl_result = EvalLoc(rl_dest, result_reg_class, true); + } + + NewLIR4(opcode, rl_result.reg.GetReg(), left_op, right_op, code); + } else { + RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; + RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; + + rl_true = LoadValue(rl_true, result_reg_class); + rl_false = LoadValue(rl_false, result_reg_class); + rl_result = EvalLoc(rl_dest, result_reg_class, true); + + int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc; + NewLIR4(opcode, rl_result.reg.GetReg(), + rl_true.reg.GetReg(), rl_false.reg.GetReg(), code); + } StoreValue(rl_dest, rl_result); } @@ -110,7 +214,6 @@ void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); LIR* taken = &block_label_list_[bb->taken]; LIR* not_taken = &block_label_list_[bb->fall_through]; - rl_src1 = LoadValueWide(rl_src1, kCoreReg); // Normalize such that if either operand is constant, src2 will be constant. ConditionCode ccode = mir->meta.ccode; if (rl_src1.is_const) { @@ -118,16 +221,22 @@ void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { ccode = FlipComparisonOrder(ccode); } + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + if (rl_src2.is_const) { - rl_src2 = UpdateLocWide(rl_src2); + // TODO: Optimize for rl_src1.is_const? (Does happen in the boot image at the moment.) + int64_t val = mir_graph_->ConstantValueWide(rl_src2); // Special handling using cbz & cbnz. if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { OpCmpImmBranch(ccode, rl_src1.reg, 0, taken); OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken); return; + } + // Only handle Imm if src2 is not already in a register. - } else if (rl_src2.location != kLocPhysReg) { + rl_src2 = UpdateLocWide(rl_src2); + if (rl_src2.location != kLocPhysReg) { OpRegImm64(kOpCmp, rl_src1.reg, val); OpCondBranch(ccode, taken); OpCondBranch(NegateComparison(ccode), not_taken); diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 5870d22208..048aca3735 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1046,9 +1046,19 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { } // Push a marker to take place of lr. vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); - // fp regs already sorted. - for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) { - vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment); + if (cu_->instruction_set == kThumb2) { + // fp regs already sorted. + for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) { + vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment); + } + } else { + // For other platforms regs may have been inserted out of order - sort first. + std::sort(fp_vmap_table_.begin(), fp_vmap_table_.end()); + for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) { + // Copy, stripping out the phys register sort key. + vmap_encoder.PushBackUnsigned( + ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); + } } } else { DCHECK_EQ(POPCOUNT(core_spill_mask_), 0); diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 6191e4b0a1..45dd7f08a6 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -96,7 +96,7 @@ MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke, MIR* move_return) { uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) { DCHECK_LT(arg, invoke->dalvikInsn.vA); - DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode)); + DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode)); if (Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc) { return invoke->dalvikInsn.vC + arg; // Non-range invoke. } else { @@ -107,7 +107,7 @@ uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) { bool WideArgIsInConsecutiveDalvikRegs(MIR* invoke, uint32_t arg) { DCHECK_LT(arg + 1, invoke->dalvikInsn.vA); - DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode)); + DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode)); return Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc || invoke->dalvikInsn.arg[arg + 1u] == invoke->dalvikInsn.arg[arg] + 1u; } diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index caadc0ad89..07c615f342 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -1185,7 +1185,7 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { work_half->meta.throw_insn = mir; } - if (MIRGraph::IsPseudoMirOp(opcode)) { + if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { HandleExtendedMethodMIR(bb, mir); continue; } diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 48855012c3..87509b636c 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -531,7 +531,7 @@ class Mir2Lir : public Backend { LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast, LIR* cont = nullptr) : m2l_(m2l), cu_(m2l->cu_), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) { - m2l->StartSlowPath(cont); + m2l->StartSlowPath(this); } virtual ~LIRSlowPath() {} virtual void Compile() = 0; @@ -705,17 +705,17 @@ class Mir2Lir : public Backend { int AssignLiteralOffset(CodeOffset offset); int AssignSwitchTablesOffset(CodeOffset offset); int AssignFillArrayDataOffset(CodeOffset offset); - LIR* InsertCaseLabel(DexOffset vaddr, int keyVal); + virtual LIR* InsertCaseLabel(DexOffset vaddr, int keyVal); void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec); void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec); - virtual void StartSlowPath(LIR *label) {} + virtual void StartSlowPath(LIRSlowPath* slowpath) {} virtual void BeginInvoke(CallInfo* info) {} virtual void EndInvoke(CallInfo* info) {} // Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation. No code generated. - RegLocation NarrowRegLoc(RegLocation loc); + virtual RegLocation NarrowRegLoc(RegLocation loc); // Shared by all targets - implemented in local_optimizations.cc void ConvertMemOpIntoMove(LIR* orig_lir, RegStorage dest, RegStorage src); @@ -763,7 +763,7 @@ class Mir2Lir : public Backend { virtual bool IsTemp(RegStorage reg); bool IsPromoted(RegStorage reg); bool IsDirty(RegStorage reg); - void LockTemp(RegStorage reg); + virtual void LockTemp(RegStorage reg); void ResetDef(RegStorage reg); void NullifyRange(RegStorage reg, int s_reg); void MarkDef(RegLocation rl, LIR *start, LIR *finish); diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index 9000514856..8e2a1e3532 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -234,8 +234,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { NewLIR0(kPseudoMethodEntry); /* Spill core callee saves */ SpillCoreRegs(); - /* NOTE: promotion of FP regs currently unsupported, thus no FP spill */ - DCHECK_EQ(num_fp_spills_, 0); + SpillFPRegs(); if (!skip_overflow_check) { class StackOverflowSlowPath : public LIRSlowPath { public: @@ -309,6 +308,7 @@ void X86Mir2Lir::GenExitSequence() { NewLIR0(kPseudoMethodExit); UnSpillCoreRegs(); + UnSpillFPRegs(); /* Remove frame except for return address */ stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set)); NewLIR0(kX86Ret); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index ff7b30eeec..b0c54e86e9 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -319,6 +319,8 @@ class X86Mir2Lir : public Mir2Lir { void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset); void SpillCoreRegs(); void UnSpillCoreRegs(); + void UnSpillFPRegs(); + void SpillFPRegs(); static const X86EncodingMap EncodingMap[kX86Last]; bool InexpensiveConstantInt(int32_t value); bool InexpensiveConstantFloat(int32_t value); diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index e81f505f2f..1ebbbbd5ee 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -52,6 +52,13 @@ static constexpr RegStorage dp_regs_arr_64[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 }; +static constexpr RegStorage xp_regs_arr_32[] = { + rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, +}; +static constexpr RegStorage xp_regs_arr_64[] = { + rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, + rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15 +}; static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32}; static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32}; static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64}; @@ -60,6 +67,24 @@ static constexpr RegStorage core_temps_arr_64[] = { rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI, rs_r8, rs_r9, rs_r10, rs_r11 }; + +// How to add register to be available for promotion: +// 1) Remove register from array defining temp +// 2) Update ClobberCallerSave +// 3) Update JNI compiler ABI: +// 3.1) add reg in JniCallingConvention method +// 3.2) update CoreSpillMask/FpSpillMask +// 4) Update entrypoints +// 4.1) Update constants in asm_support_x86_64.h for new frame size +// 4.2) Remove entry in SmashCallerSaves +// 4.3) Update jni_entrypoints to spill/unspill new callee save reg +// 4.4) Update quick_entrypoints to spill/unspill new callee save reg +// 5) Update runtime ABI +// 5.1) Update quick_method_frame_info with new required spills +// 5.2) Update QuickArgumentVisitor with new offsets to gprs and xmms +// Note that you cannot use register corresponding to incoming args +// according to ABI and QCG needs one additional XMM temp for +// bulk copy in preparation to call. static constexpr RegStorage core_temps_arr_64q[] = { rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q, rs_r8q, rs_r9q, rs_r10q, rs_r11q @@ -69,14 +94,14 @@ static constexpr RegStorage sp_temps_arr_32[] = { }; static constexpr RegStorage sp_temps_arr_64[] = { rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, - rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15 + rs_fr8, rs_fr9, rs_fr10, rs_fr11 }; static constexpr RegStorage dp_temps_arr_32[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, }; static constexpr RegStorage dp_temps_arr_64[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, - rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 + rs_dr8, rs_dr9, rs_dr10, rs_dr11 }; static constexpr RegStorage xp_temps_arr_32[] = { @@ -84,7 +109,7 @@ static constexpr RegStorage xp_temps_arr_32[] = { }; static constexpr RegStorage xp_temps_arr_64[] = { rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, - rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15 + rs_xr8, rs_xr9, rs_xr10, rs_xr11 }; static constexpr ArrayRef<const RegStorage> empty_pool; @@ -95,6 +120,8 @@ static constexpr ArrayRef<const RegStorage> sp_regs_32(sp_regs_arr_32); static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64); static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32); static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64); +static constexpr ArrayRef<const RegStorage> xp_regs_32(xp_regs_arr_32); +static constexpr ArrayRef<const RegStorage> xp_regs_64(xp_regs_arr_64); static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32); static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64); static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q); @@ -437,21 +464,13 @@ bool X86Mir2Lir::IsByteRegister(RegStorage reg) { /* Clobber all regs that might be used by an external C call */ void X86Mir2Lir::ClobberCallerSave() { - Clobber(rs_rAX); - Clobber(rs_rCX); - Clobber(rs_rDX); - Clobber(rs_rBX); - - Clobber(rs_fr0); - Clobber(rs_fr1); - Clobber(rs_fr2); - Clobber(rs_fr3); - Clobber(rs_fr4); - Clobber(rs_fr5); - Clobber(rs_fr6); - Clobber(rs_fr7); - if (cu_->target64) { + Clobber(rs_rAX); + Clobber(rs_rCX); + Clobber(rs_rDX); + Clobber(rs_rSI); + Clobber(rs_rDI); + Clobber(rs_r8); Clobber(rs_r9); Clobber(rs_r10); @@ -461,11 +480,21 @@ void X86Mir2Lir::ClobberCallerSave() { Clobber(rs_fr9); Clobber(rs_fr10); Clobber(rs_fr11); - Clobber(rs_fr12); - Clobber(rs_fr13); - Clobber(rs_fr14); - Clobber(rs_fr15); + } else { + Clobber(rs_rAX); + Clobber(rs_rCX); + Clobber(rs_rDX); + Clobber(rs_rBX); } + + Clobber(rs_fr0); + Clobber(rs_fr1); + Clobber(rs_fr2); + Clobber(rs_fr3); + Clobber(rs_fr4); + Clobber(rs_fr5); + Clobber(rs_fr6); + Clobber(rs_fr7); } RegLocation X86Mir2Lir::GetReturnWideAlt() { @@ -599,11 +628,15 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() { // Target-specific adjustments. // Add in XMM registers. - const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32; - for (RegStorage reg : *xp_temps) { + const ArrayRef<const RegStorage> *xp_regs = cu_->target64 ? &xp_regs_64 : &xp_regs_32; + for (RegStorage reg : *xp_regs) { RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg)); reginfo_map_.Put(reg.GetReg(), info); - info->SetIsTemp(true); + } + const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32; + for (RegStorage reg : *xp_temps) { + RegisterInfo* xp_reg_info = GetRegInfo(reg); + xp_reg_info->SetIsTemp(true); } // Alias single precision xmm to double xmms. @@ -665,9 +698,11 @@ void X86Mir2Lir::SpillCoreRegs() { // Spill mask not including fake return address register uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); + OpSize size = cu_->target64 ? k64 : k32; for (int reg = 0; mask; mask >>= 1, reg++) { if (mask & 0x1) { - StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg)); + StoreBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), + size, kNotVolatile); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } @@ -680,14 +715,46 @@ void X86Mir2Lir::UnSpillCoreRegs() { // Spill mask not including fake return address register uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); + OpSize size = cu_->target64 ? k64 : k32; for (int reg = 0; mask; mask >>= 1, reg++) { if (mask & 0x1) { - LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg)); + LoadBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), + size, kNotVolatile); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } } +void X86Mir2Lir::SpillFPRegs() { + if (num_fp_spills_ == 0) { + return; + } + uint32_t mask = fp_spill_mask_; + int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); + for (int reg = 0; mask; mask >>= 1, reg++) { + if (mask & 0x1) { + StoreBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg), + k64, kNotVolatile); + offset += sizeof(double); + } + } +} +void X86Mir2Lir::UnSpillFPRegs() { + if (num_fp_spills_ == 0) { + return; + } + uint32_t mask = fp_spill_mask_; + int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); + for (int reg = 0; mask; mask >>= 1, reg++) { + if (mask & 0x1) { + LoadBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg), + k64, kNotVolatile); + offset += sizeof(double); + } + } +} + + bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) { return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32); } diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index 657160ffd1..5c7c91b5b5 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -917,7 +917,7 @@ void X86Mir2Lir::AnalyzeBB(BasicBlock * bb) { for (MIR *mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { int opcode = mir->dalvikInsn.opcode; - if (MIRGraph::IsPseudoMirOp(opcode)) { + if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { AnalyzeExtendedMIR(opcode, bb, mir); } else { AnalyzeMIR(opcode, bb, mir); diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 2789923bb9..56573810ca 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -66,7 +66,9 @@ namespace art { * XMM6: caller | caller, arg7 | caller, scratch | caller, arg7, scratch * XMM7: caller | caller, arg8 | caller, scratch | caller, arg8, scratch * --- x86-64/x32 registers - * XMM8 .. 15: caller save available as scratch registers for ART. + * XMM8 .. 11: caller save available as scratch registers for ART. + * XMM12 .. 15: callee save available as promoted registers for ART. + * This change (XMM12..15) is for QCG only, for others they are caller save. * * X87 is a necessary evil outside of ART code for x86: * ST0: x86 float/double native return value, caller save diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc index db383c4d0b..892b30284f 100644 --- a/compiler/dex/vreg_analysis.cc +++ b/compiler/dex/vreg_analysis.cc @@ -251,7 +251,8 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) { // Special-case handling for format 35c/3rc invokes Instruction::Code opcode = mir->dalvikInsn.opcode; - int flags = IsPseudoMirOp(opcode) ? 0 : Instruction::FlagsOf(mir->dalvikInsn.opcode); + int flags = MIR::DecodedInstruction::IsPseudoMirOp(opcode) ? + 0 : Instruction::FlagsOf(mir->dalvikInsn.opcode); if ((flags & Instruction::kInvoke) && (attrs & (DF_FORMAT_35C | DF_FORMAT_3RC))) { DCHECK_EQ(next, 0); diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc index 5febed24fe..525f05c522 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc @@ -130,6 +130,10 @@ X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static, bool is_s callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R13)); callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R14)); callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R15)); + callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM12)); + callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM13)); + callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM14)); + callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM15)); } uint32_t X86_64JniCallingConvention::CoreSpillMask() const { @@ -137,6 +141,10 @@ uint32_t X86_64JniCallingConvention::CoreSpillMask() const { 1 << kNumberOfCpuRegisters; } +uint32_t X86_64JniCallingConvention::FpSpillMask() const { + return 1 << XMM12 | 1 << XMM13 | 1 << XMM14 | 1 << XMM15; +} + size_t X86_64JniCallingConvention::FrameSize() { // Method*, return address and callee save area size, local reference segment state size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) + diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h index 1ba5353289..7a90c6e94e 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h @@ -61,9 +61,7 @@ class X86_64JniCallingConvention FINAL : public JniCallingConvention { } ManagedRegister ReturnScratchRegister() const OVERRIDE; uint32_t CoreSpillMask() const OVERRIDE; - uint32_t FpSpillMask() const OVERRIDE { - return 0; - } + uint32_t FpSpillMask() const OVERRIDE; bool IsCurrentParamInRegister() OVERRIDE; bool IsCurrentParamOnStack() OVERRIDE; ManagedRegister CurrentParamRegister() OVERRIDE; diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 4d5d613015..78738d8934 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1671,16 +1671,31 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& spill_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); + int gpr_count = 0; for (int i = spill_regs.size() - 1; i >= 0; --i) { - pushq(spill_regs.at(i).AsX86_64().AsCpuRegister()); + x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); + if (spill.IsCpuRegister()) { + pushq(spill.AsCpuRegister()); + gpr_count++; + } } // return address then method on stack - addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(frame_size) + (spill_regs.size() * kFramePointerSize) + - sizeof(StackReference<mirror::ArtMethod>) /*method*/ + - kFramePointerSize /*return address*/)); + int64_t rest_of_frame = static_cast<int64_t>(frame_size) + - (gpr_count * kFramePointerSize) + - kFramePointerSize /*return address*/; + subq(CpuRegister(RSP), Immediate(rest_of_frame)); + // spill xmms + int64_t offset = rest_of_frame; + for (int i = spill_regs.size() - 1; i >= 0; --i) { + x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); + if (spill.IsXmmRegister()) { + offset -= sizeof(double); + movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister()); + } + } DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>)); - subq(CpuRegister(RSP), Immediate(4)); + movl(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister()); for (size_t i = 0; i < entry_spills.size(); ++i) { @@ -1707,9 +1722,24 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void X86_64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); - addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - kFramePointerSize)); + int gpr_count = 0; + // unspill xmms + int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize; for (size_t i = 0; i < spill_regs.size(); ++i) { - popq(spill_regs.at(i).AsX86_64().AsCpuRegister()); + x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); + if (spill.IsXmmRegister()) { + offset += sizeof(double); + movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset)); + } else { + gpr_count++; + } + } + addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize)); + for (size_t i = 0; i < spill_regs.size(); ++i) { + x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); + if (spill.IsCpuRegister()) { + popq(spill.AsCpuRegister()); + } } ret(); } diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index f7bad8b057..dc1758ffdf 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -246,11 +246,9 @@ std::string buildframe_test_fn(x86_64::X86_64Assembler* assembler) { str << "pushq %rsi\n"; str << "pushq %r10\n"; // 2) Move down the stack pointer. - ssize_t displacement = -static_cast<ssize_t>(frame_size) + spill_regs.size() * 8 + - sizeof(StackReference<mirror::ArtMethod>) + 8; - str << "addq $" << displacement << ", %rsp\n"; - // 3) Make space for method reference, and store it. - str << "subq $4, %rsp\n"; + ssize_t displacement = static_cast<ssize_t>(frame_size) - (spill_regs.size() * 8 + 8); + str << "subq $" << displacement << ", %rsp\n"; + // 3) Store method reference. str << "movl %edi, (%rsp)\n"; // 4) Entry spills. str << "movq %rax, " << frame_size + 0 << "(%rsp)\n"; diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h index bff8501cf2..05d0ef8761 100644 --- a/runtime/arch/x86_64/asm_support_x86_64.h +++ b/runtime/arch/x86_64/asm_support_x86_64.h @@ -35,9 +35,9 @@ // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu #define THREAD_ID_OFFSET 12 -#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 -#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 -#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 +#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 + 4*8 +#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 + 4*8 +#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 + 4*8 // Expected size of a heap reference #define HEAP_REFERENCE_SIZE 4 diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc index e1f47ee3d4..7699eaf9d4 100644 --- a/runtime/arch/x86_64/context_x86_64.cc +++ b/runtime/arch/x86_64/context_x86_64.cc @@ -78,6 +78,18 @@ void X86_64Context::SmashCallerSaves() { gprs_[R9] = nullptr; gprs_[R10] = nullptr; gprs_[R11] = nullptr; + fprs_[XMM0] = nullptr; + fprs_[XMM1] = nullptr; + fprs_[XMM2] = nullptr; + fprs_[XMM3] = nullptr; + fprs_[XMM4] = nullptr; + fprs_[XMM5] = nullptr; + fprs_[XMM6] = nullptr; + fprs_[XMM7] = nullptr; + fprs_[XMM8] = nullptr; + fprs_[XMM9] = nullptr; + fprs_[XMM10] = nullptr; + fprs_[XMM11] = nullptr; } bool X86_64Context::SetGPR(uint32_t reg, uintptr_t value) { @@ -102,41 +114,26 @@ bool X86_64Context::SetFPR(uint32_t reg, uintptr_t value) { } } +extern "C" void art_quick_do_long_jump(uintptr_t*, uintptr_t*); + void X86_64Context::DoLongJump() { #if defined(__x86_64__) - // Array of GPR values, filled from the context backward for the long jump pop. We add a slot at - // the top for the stack pointer that doesn't get popped in a pop-all. - volatile uintptr_t gprs[kNumberOfCpuRegisters + 1]; + uintptr_t gprs[kNumberOfCpuRegisters + 1]; + uintptr_t fprs[kNumberOfFloatRegisters]; + for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) { gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86_64Context::kBadGprBase + i; } + for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) { + fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : X86_64Context::kBadFprBase + i; + } + // We want to load the stack pointer one slot below so that the ret will pop eip. uintptr_t rsp = gprs[kNumberOfCpuRegisters - RSP - 1] - kWordSize; gprs[kNumberOfCpuRegisters] = rsp; *(reinterpret_cast<uintptr_t*>(rsp)) = rip_; - __asm__ __volatile__( - "movq %0, %%rsp\n\t" // RSP points to gprs. - "popq %%r15\n\t" // Load all registers except RSP and RIP with values in gprs. - "popq %%r14\n\t" - "popq %%r13\n\t" - "popq %%r12\n\t" - "popq %%r11\n\t" - "popq %%r10\n\t" - "popq %%r9\n\t" - "popq %%r8\n\t" - "popq %%rdi\n\t" - "popq %%rsi\n\t" - "popq %%rbp\n\t" - "addq $8, %%rsp\n\t" - "popq %%rbx\n\t" - "popq %%rdx\n\t" - "popq %%rcx\n\t" - "popq %%rax\n\t" - "popq %%rsp\n\t" // Load stack pointer. - "ret\n\t" // From higher in the stack pop rip. - : // output. - : "g"(&gprs[0]) // input. - :); // clobber. + + art_quick_do_long_jump(gprs, fprs); #else UNIMPLEMENTED(FATAL); #endif diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index 609d1c6500..204d52c723 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -35,7 +35,7 @@ extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*); extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*); // Cast entrypoints. -extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass, +extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass, const mirror::Class* ref_class); extern "C" void art_quick_check_cast(void*, void*); @@ -129,7 +129,7 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, ResetQuickAllocEntryPoints(qpoints); // Cast - qpoints->pInstanceofNonTrivial = artIsAssignableFromCode; + qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code; qpoints->pCheckCast = art_quick_check_cast; // DexCache diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S index d668797ba4..f6736df11f 100644 --- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S @@ -28,8 +28,8 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub PUSH rdx // Arg. PUSH rcx // Arg. // Create space for FPR args, plus padding for alignment - subq LITERAL(72), %rsp - CFI_ADJUST_CFA_OFFSET(72) + subq LITERAL(72 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(72 + 4 * 8) // Save FPRs. movq %xmm0, 0(%rsp) movq %xmm1, 8(%rsp) @@ -39,6 +39,10 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub movq %xmm5, 40(%rsp) movq %xmm6, 48(%rsp) movq %xmm7, 56(%rsp) + movq %xmm12, 64(%rsp) + movq %xmm13, 72(%rsp) + movq %xmm14, 80(%rsp) + movq %xmm15, 88(%rsp) // prepare call movq %gs:THREAD_SELF_OFFSET, %rdi // RDI := Thread::Current() // call @@ -52,8 +56,12 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub movq 40(%rsp), %xmm5 movq 48(%rsp), %xmm6 movq 56(%rsp), %xmm7 - addq LITERAL(72), %rsp - CFI_ADJUST_CFA_OFFSET(-72) + movq 64(%rsp), %xmm12 + movq 72(%rsp), %xmm13 + movq 80(%rsp), %xmm14 + movq 88(%rsp), %xmm15 + addq LITERAL(72 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-72 - 4 * 8) POP rcx // Arg. POP rdx // Arg. POP rsi // Arg. diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 8fa947c9b3..7f7226c0ad 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -16,6 +16,26 @@ #include "asm_support_x86_64.S" +MACRO0(SETUP_FP_CALLEE_SAVE_FRAME) + // Create space for ART FP callee-saved registers + subq LITERAL(4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(4 * 8) + movq %xmm12, 0(%rsp) + movq %xmm13, 8(%rsp) + movq %xmm14, 16(%rsp) + movq %xmm15, 24(%rsp) +END_MACRO + +MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME) + // Restore ART FP callee-saved registers + movq 0(%rsp), %xmm12 + movq 8(%rsp), %xmm13 + movq 16(%rsp), %xmm14 + movq 24(%rsp), %xmm15 + addq LITERAL(4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(- 4 * 8) +END_MACRO + // For x86, the CFA is esp+4, the address above the pushed return address on the stack. /* @@ -37,6 +57,14 @@ MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME) PUSH r12 // Callee save. PUSH rbp // Callee save. PUSH rbx // Callee save. + // Create space for FPR args, plus padding for alignment + subq LITERAL(4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(4 * 8) + // Save FPRs. + movq %xmm12, 0(%rsp) + movq %xmm13, 8(%rsp) + movq %xmm14, 16(%rsp) + movq %xmm15, 24(%rsp) subq MACRO_LITERAL(8), %rsp // Space for Method* (also aligns the frame). CFI_ADJUST_CFA_OFFSET(8) // R10 := ArtMethod* for save all callee save frame method. @@ -46,7 +74,7 @@ MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME) // Ugly compile-time check, but we only have the preprocessor. // Last +8: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 8 + 8) +#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8) #error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected." #endif #endif // __APPLE__ @@ -71,8 +99,14 @@ MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME) PUSH r12 // Callee save. PUSH rbp // Callee save. PUSH rbx // Callee save. - subq MACRO_LITERAL(8), %rsp // Space for Method* (also aligns the frame). - CFI_ADJUST_CFA_OFFSET(8) + // Create space for FPR args, plus padding for alignment + subq LITERAL(8 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(8 + 4*8) + // Save FPRs. + movq %xmm12, 8(%rsp) + movq %xmm13, 16(%rsp) + movq %xmm14, 24(%rsp) + movq %xmm15, 32(%rsp) // R10 := ArtMethod* for refs only callee save frame method. movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10 // Store ArtMethod* to bottom of stack. @@ -80,15 +114,19 @@ MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME) // Ugly compile-time check, but we only have the preprocessor. // Last +8: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 8 + 8) +#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8) #error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected." #endif #endif // __APPLE__ END_MACRO MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME) - addq MACRO_LITERAL(8), %rsp - CFI_ADJUST_CFA_OFFSET(-8) + movq 8(%rsp), %xmm12 + movq 16(%rsp), %xmm13 + movq 24(%rsp), %xmm14 + movq 32(%rsp), %xmm15 + addq LITERAL(8 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(-8 - 4*8) // TODO: optimize by not restoring callee-saves restored by the ABI POP rbx POP rbp @@ -123,8 +161,8 @@ MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME) PUSH rdx // Quick arg 2. PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. - subq MACRO_LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(80) + subq MACRO_LITERAL(80 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(80 + 4 * 8) // R10 := ArtMethod* for ref and args callee save frame method. movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10 // Save FPRs. @@ -136,12 +174,16 @@ MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME) movq %xmm5, 56(%rsp) movq %xmm6, 64(%rsp) movq %xmm7, 72(%rsp) + movq %xmm12, 80(%rsp) + movq %xmm13, 88(%rsp) + movq %xmm14, 96(%rsp) + movq %xmm15, 104(%rsp) // Store ArtMethod* to bottom of stack. movq %r10, 0(%rsp) // Ugly compile-time check, but we only have the preprocessor. // Last +8: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 80 + 8) +#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 4*8 + 80 + 8) #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected." #endif #endif // __APPLE__ @@ -157,8 +199,12 @@ MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME) movq 56(%rsp), %xmm5 movq 64(%rsp), %xmm6 movq 72(%rsp), %xmm7 - addq MACRO_LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(-80) + movq 80(%rsp), %xmm12 + movq 88(%rsp), %xmm13 + movq 96(%rsp), %xmm14 + movq 104(%rsp), %xmm15 + addq MACRO_LITERAL(80 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8)) // Restore callee and GPR args, mixed together to agree with core spills bitmap. POP rcx POP rdx @@ -536,6 +582,58 @@ DEFINE_FUNCTION art_quick_invoke_static_stub #endif // __APPLE__ END_FUNCTION art_quick_invoke_static_stub + /* + * Long jump stub. + * On entry: + * rdi = gprs + * rsi = fprs + */ +DEFINE_FUNCTION art_quick_do_long_jump +#if defined(__APPLE__) + int3 + int3 +#else + // Restore FPRs. + movq 0(%rsi), %xmm0 + movq 8(%rsi), %xmm1 + movq 16(%rsi), %xmm2 + movq 24(%rsi), %xmm3 + movq 32(%rsi), %xmm4 + movq 40(%rsi), %xmm5 + movq 48(%rsi), %xmm6 + movq 56(%rsi), %xmm7 + movq 64(%rsi), %xmm8 + movq 72(%rsi), %xmm9 + movq 80(%rsi), %xmm10 + movq 88(%rsi), %xmm11 + movq 96(%rsi), %xmm12 + movq 104(%rsi), %xmm13 + movq 112(%rsi), %xmm14 + movq 120(%rsi), %xmm15 + // Restore FPRs. + movq %rdi, %rsp // RSP points to gprs. + // Load all registers except RSP and RIP with values in gprs. + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rdi + popq %rsi + popq %rbp + addq LITERAL(8), %rsp // Skip rsp + popq %rbx + popq %rdx + popq %rcx + popq %rax + popq %rsp // Load stack pointer. + ret // From higher in the stack pop rip. +#endif // __APPLE__ +END_FUNCTION art_quick_do_long_jump + MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro) DEFINE_FUNCTION VAR(c_name, 0) SETUP_REF_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC @@ -820,13 +918,17 @@ END_FUNCTION art_quick_unlock_object DEFINE_FUNCTION art_quick_check_cast PUSH rdi // Save args for exc PUSH rsi + SETUP_FP_CALLEE_SAVE_FRAME call PLT_SYMBOL(artIsAssignableFromCode) // (Class* klass, Class* ref_klass) testq %rax, %rax jz 1f // jump forward if not assignable + RESTORE_FP_CALLEE_SAVE_FRAME addq LITERAL(16), %rsp // pop arguments CFI_ADJUST_CFA_OFFSET(-16) + ret 1: + RESTORE_FP_CALLEE_SAVE_FRAME POP rsi // Pop arguments POP rdi SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context @@ -907,6 +1009,7 @@ DEFINE_FUNCTION art_quick_aput_obj PUSH rdx subq LITERAL(8), %rsp // Alignment padding. CFI_ADJUST_CFA_OFFSET(8) + SETUP_FP_CALLEE_SAVE_FRAME // "Uncompress" = do nothing, as already zero-extended on load. movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class. @@ -918,6 +1021,7 @@ DEFINE_FUNCTION art_quick_aput_obj testq %rax, %rax jz .Lthrow_array_store_exception + RESTORE_FP_CALLEE_SAVE_FRAME // Restore arguments. addq LITERAL(8), %rsp CFI_ADJUST_CFA_OFFSET(-8) @@ -934,6 +1038,7 @@ DEFINE_FUNCTION art_quick_aput_obj // movb %dl, (%rdx, %rdi) ret .Lthrow_array_store_exception: + RESTORE_FP_CALLEE_SAVE_FRAME // Restore arguments. addq LITERAL(8), %rsp CFI_ADJUST_CFA_OFFSET(-8) @@ -1012,8 +1117,8 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler PUSH rdx // Quick arg 2. PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. - subq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(80) + subq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(80 + 4*8) // Save FPRs. movq %xmm0, 16(%rsp) movq %xmm1, 24(%rsp) @@ -1023,14 +1128,18 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler movq %xmm5, 56(%rsp) movq %xmm6, 64(%rsp) movq %xmm7, 72(%rsp) + movq %xmm12, 80(%rsp) + movq %xmm13, 88(%rsp) + movq %xmm14, 96(%rsp) + movq %xmm15, 104(%rsp) // Store proxy method to bottom of stack. movq %rdi, 0(%rsp) movq %gs:THREAD_SELF_OFFSET, %rdx // Pass Thread::Current(). movq %rsp, %rcx // Pass SP. call PLT_SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP) movq %rax, %xmm0 // Copy return value in case of float returns. - addq LITERAL(168), %rsp // Pop arguments. - CFI_ADJUST_CFA_OFFSET(-168) + addq LITERAL(168 + 4*8), %rsp // Pop arguments. + CFI_ADJUST_CFA_OFFSET(-168 - 4*8) RETURN_OR_DELIVER_PENDING_EXCEPTION END_FUNCTION art_quick_proxy_invoke_handler @@ -1156,8 +1265,8 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline PUSH rdx // Quick arg 2. PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. - subq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(80) + subq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(80 + 4*8) // Save FPRs. movq %xmm0, 16(%rsp) movq %xmm1, 24(%rsp) @@ -1167,6 +1276,10 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq %xmm5, 56(%rsp) movq %xmm6, 64(%rsp) movq %xmm7, 72(%rsp) + movq %xmm12, 80(%rsp) + movq %xmm13, 88(%rsp) + movq %xmm14, 96(%rsp) + movq %xmm15, 104(%rsp) movq %rdi, 0(%rsp) // Store native ArtMethod* to bottom of stack. movq %rsp, %rbp // save SP at (old) callee-save frame CFI_DEF_CFA_REGISTER(rbp) @@ -1260,9 +1373,13 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq 56(%rsp), %xmm5 movq 64(%rsp), %xmm6 movq 72(%rsp), %xmm7 + movq 80(%rsp), %xmm12 + movq 88(%rsp), %xmm13 + movq 96(%rsp), %xmm14 + movq 104(%rsp), %xmm15 // was 80 bytes - addq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(-80) + addq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(-80 - 4*8) // Save callee and GPR args, mixed together to agree with core spills bitmap. POP rcx // Arg. POP rdx // Arg. @@ -1292,9 +1409,13 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq 56(%rsp), %xmm5 movq 64(%rsp), %xmm6 movq 72(%rsp), %xmm7 - // was 80 bytes - addq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(-80) + movq 80(%rsp), %xmm12 + movq 88(%rsp), %xmm13 + movq 96(%rsp), %xmm14 + movq 104(%rsp), %xmm15 + // was 80 + 32 bytes + addq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(-80 - 4*8) // Save callee and GPR args, mixed together to agree with core spills bitmap. POP rcx // Arg. POP rdx // Arg. @@ -1450,3 +1571,10 @@ DEFINE_FUNCTION art_quick_string_compareto END_FUNCTION art_quick_string_compareto UNIMPLEMENTED art_quick_memcmp16 + +DEFINE_FUNCTION art_quick_assignable_from_code + SETUP_FP_CALLEE_SAVE_FRAME + call PLT_SYMBOL(artIsAssignableFromCode) // (const mirror::Class*, const mirror::Class*) + RESTORE_FP_CALLEE_SAVE_FRAME + ret +END_FUNCTION art_quick_assignable_from_code diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h index 618390903b..53aa212a88 100644 --- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h +++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h @@ -34,6 +34,9 @@ static constexpr uint32_t kX86_64CalleeSaveFpArgSpills = (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) | (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) | (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7); +static constexpr uint32_t kX86_64CalleeSaveFpSpills = + (1 << art::x86_64::XMM12) | (1 << art::x86_64::XMM13) | + (1 << art::x86_64::XMM14) | (1 << art::x86_64::XMM15); constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { return kX86_64CalleeSaveRefSpills | @@ -42,7 +45,8 @@ constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { } constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) { - return (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0); + return kX86_64CalleeSaveFpSpills | + (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0); } constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) { diff --git a/runtime/arch/x86_64/registers_x86_64.cc b/runtime/arch/x86_64/registers_x86_64.cc index 38f3494502..f29c42652b 100644 --- a/runtime/arch/x86_64/registers_x86_64.cc +++ b/runtime/arch/x86_64/registers_x86_64.cc @@ -34,5 +34,14 @@ std::ostream& operator<<(std::ostream& os, const Register& rhs) { return os; } +std::ostream& operator<<(std::ostream& os, const FloatRegister& rhs) { + if (rhs >= XMM0 && rhs <= XMM15) { + os << "xmm" << static_cast<int>(rhs); + } else { + os << "Register[" << static_cast<int>(rhs) << "]"; + } + return os; +} + } // namespace x86_64 } // namespace art diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc index d161d0b9ed..2edcb78be3 100644 --- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc @@ -32,10 +32,15 @@ extern "C" const void* artInstrumentationMethodEntryFromCode(mirror::ArtMethod* SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs); instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); - const void* result = instrumentation->GetQuickCodeFor(method); + const void* result; + if (instrumentation->IsDeoptimized(method)) { + result = GetQuickToInterpreterBridge(); + } else { + result = instrumentation->GetQuickCodeFor(method); + } DCHECK(result != GetQuickToInterpreterBridgeTrampoline(Runtime::Current()->GetClassLinker())); bool interpreter_entry = (result == GetQuickToInterpreterBridge()); - instrumentation->PushInstrumentationStackFrame(self, method->IsStatic() ? NULL : this_object, + instrumentation->PushInstrumentationStackFrame(self, method->IsStatic() ? nullptr : this_object, method, lr, interpreter_entry); CHECK(result != NULL) << PrettyMethod(method); return result; diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 95cb85eefc..2a66f2fe3b 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -175,8 +175,8 @@ class QuickArgumentVisitor { static constexpr size_t kNumQuickGprArgs = 5; // 5 arguments passed in GPRs. static constexpr size_t kNumQuickFprArgs = 8; // 8 arguments passed in FPRs. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80; // Offset of first GPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168; // Offset of return address. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80 + 4*8; // Offset of first GPR arg. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168 + 4*8; // Offset of return address. static size_t GprIndexToGprOffset(uint32_t gpr_index) { switch (gpr_index) { case 0: return (4 * GetBytesPerGprSpillLocation(kRuntimeISA)); diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc index 722576f164..c66e80d2f1 100644 --- a/runtime/gc/allocator/rosalloc.cc +++ b/runtime/gc/allocator/rosalloc.cc @@ -2112,30 +2112,40 @@ size_t RosAlloc::ReleasePages() { // result in occasionally not releasing pages which we could release. byte pm = page_map_[i]; switch (pm) { + case kPageMapReleased: + // Fall through. case kPageMapEmpty: { - // Only lock if we have an empty page since we want to prevent other threads racing in. + // This is currently the start of a free page run. + // Acquire the lock to prevent other threads racing in and modifying the page map. MutexLock mu(self, lock_); // Check that it's still empty after we acquired the lock since another thread could have // raced in and placed an allocation here. - pm = page_map_[i]; - if (LIKELY(pm == kPageMapEmpty)) { - // The start of a free page run. Release pages. + if (IsFreePage(i)) { + // Free page runs can start with a released page if we coalesced a released page free + // page run with an empty page run. FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize); - DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end()); - size_t fpr_size = fpr->ByteSize(this); - DCHECK(IsAligned<kPageSize>(fpr_size)); - byte* start = reinterpret_cast<byte*>(fpr); - reclaimed_bytes += ReleasePageRange(start, start + fpr_size); - i += fpr_size / kPageSize; - DCHECK_LE(i, page_map_size_); + // There is a race condition where FreePage can coalesce fpr with the previous + // free page run before we acquire lock_. In that case free_page_runs_.find will not find + // a run starting at fpr. To handle this race, we skip reclaiming the page range and go + // to the next page. + if (free_page_runs_.find(fpr) != free_page_runs_.end()) { + size_t fpr_size = fpr->ByteSize(this); + DCHECK(IsAligned<kPageSize>(fpr_size)); + byte* start = reinterpret_cast<byte*>(fpr); + reclaimed_bytes += ReleasePageRange(start, start + fpr_size); + size_t pages = fpr_size / kPageSize; + CHECK_GT(pages, 0U) << "Infinite loop probable"; + i += pages; + DCHECK_LE(i, page_map_size_); + break; + } } - break; + // Fall through. } case kPageMapLargeObject: // Fall through. case kPageMapLargeObjectPart: // Fall through. case kPageMapRun: // Fall through. case kPageMapRunPart: // Fall through. - case kPageMapReleased: // Fall through since it is already released. ++i; break; // Skip. default: @@ -2175,6 +2185,34 @@ size_t RosAlloc::ReleasePageRange(byte* start, byte* end) { return reclaimed_bytes; } +void RosAlloc::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) { + Thread* self = Thread::Current(); + size_t largest_continuous_free_pages = 0; + WriterMutexLock wmu(self, bulk_free_lock_); + MutexLock mu(self, lock_); + for (FreePageRun* fpr : free_page_runs_) { + largest_continuous_free_pages = std::max(largest_continuous_free_pages, + fpr->ByteSize(this)); + } + if (failed_alloc_bytes > kLargeSizeThreshold) { + // Large allocation. + size_t required_bytes = RoundUp(failed_alloc_bytes, kPageSize); + if (required_bytes > largest_continuous_free_pages) { + os << "; failed due to fragmentation (required continguous free " + << required_bytes << " bytes where largest contiguous free " + << largest_continuous_free_pages << " bytes)"; + } + } else { + // Non-large allocation. + size_t required_bytes = numOfPages[SizeToIndex(failed_alloc_bytes)] * kPageSize; + if (required_bytes > largest_continuous_free_pages) { + os << "; failed due to fragmentation (required continguous free " + << required_bytes << " bytes for a new buffer where largest contiguous free " + << largest_continuous_free_pages << " bytes)"; + } + } +} + } // namespace allocator } // namespace gc } // namespace art diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h index fad0dc888e..85a8225807 100644 --- a/runtime/gc/allocator/rosalloc.h +++ b/runtime/gc/allocator/rosalloc.h @@ -590,6 +590,8 @@ class RosAlloc { // Verify for debugging. void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); + + void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes); }; } // namespace allocator diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index e9adca07c6..19715e9331 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -805,37 +805,23 @@ space::ImageSpace* Heap::GetImageSpace() const { return NULL; } -static void MSpaceChunkCallback(void* start, void* end, size_t used_bytes, void* arg) { - size_t chunk_size = reinterpret_cast<uint8_t*>(end) - reinterpret_cast<uint8_t*>(start); - if (used_bytes < chunk_size) { - size_t chunk_free_bytes = chunk_size - used_bytes; - size_t& max_contiguous_allocation = *reinterpret_cast<size_t*>(arg); - max_contiguous_allocation = std::max(max_contiguous_allocation, chunk_free_bytes); - } -} - -void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation) { +void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) { std::ostringstream oss; size_t total_bytes_free = GetFreeMemory(); oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free << " free bytes"; // If the allocation failed due to fragmentation, print out the largest continuous allocation. - if (!large_object_allocation && total_bytes_free >= byte_count) { - size_t max_contiguous_allocation = 0; - for (const auto& space : continuous_spaces_) { - if (space->IsMallocSpace()) { - // To allow the Walk/InspectAll() to exclusively-lock the mutator - // lock, temporarily release the shared access to the mutator - // lock here by transitioning to the suspended state. - Locks::mutator_lock_->AssertSharedHeld(self); - self->TransitionFromRunnableToSuspended(kSuspended); - space->AsMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation); - self->TransitionFromSuspendedToRunnable(); - Locks::mutator_lock_->AssertSharedHeld(self); - } + if (allocator_type != kAllocatorTypeLOS && total_bytes_free >= byte_count) { + space::MallocSpace* space = nullptr; + if (allocator_type == kAllocatorTypeNonMoving) { + space = non_moving_space_; + } else if (allocator_type == kAllocatorTypeRosAlloc || + allocator_type == kAllocatorTypeDlMalloc) { + space = main_space_; + } + if (space != nullptr) { + space->LogFragmentationAllocFailure(oss, byte_count); } - oss << "; failed due to fragmentation (largest possible contiguous allocation " - << max_contiguous_allocation << " bytes)"; } self->ThrowOutOfMemoryError(oss.str().c_str()); } @@ -1188,7 +1174,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat } ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size); if (ptr == nullptr) { - ThrowOutOfMemoryError(self, alloc_size, allocator == kAllocatorTypeLOS); + ThrowOutOfMemoryError(self, alloc_size, allocator); } return ptr; } diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index c9ea03e45c..86dab21008 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -120,7 +120,7 @@ class Heap { static constexpr size_t kDefaultStartingSize = kPageSize; static constexpr size_t kDefaultInitialSize = 2 * MB; - static constexpr size_t kDefaultMaximumSize = 32 * MB; + static constexpr size_t kDefaultMaximumSize = 256 * MB; static constexpr size_t kDefaultMaxFree = 2 * MB; static constexpr size_t kDefaultMinFree = kDefaultMaxFree / 4; static constexpr size_t kDefaultLongPauseLogThreshold = MsToNs(5); @@ -194,7 +194,6 @@ class Heap { void CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void ThrowOutOfMemoryError(size_t byte_count, bool large_object_allocation); void RegisterNativeAllocation(JNIEnv* env, int bytes); void RegisterNativeFree(JNIEnv* env, int bytes); @@ -628,7 +627,7 @@ class Heap { size_t* usable_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation) + void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); template <bool kGrow> diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc index 5123e4787d..456d1b31e2 100644 --- a/runtime/gc/space/dlmalloc_space.cc +++ b/runtime/gc/space/dlmalloc_space.cc @@ -304,6 +304,30 @@ void DlMallocSpace::CheckMoreCoreForPrecondition() { } #endif +static void MSpaceChunkCallback(void* start, void* end, size_t used_bytes, void* arg) { + size_t chunk_size = reinterpret_cast<uint8_t*>(end) - reinterpret_cast<uint8_t*>(start); + if (used_bytes < chunk_size) { + size_t chunk_free_bytes = chunk_size - used_bytes; + size_t& max_contiguous_allocation = *reinterpret_cast<size_t*>(arg); + max_contiguous_allocation = std::max(max_contiguous_allocation, chunk_free_bytes); + } +} + +void DlMallocSpace::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) { + Thread* self = Thread::Current(); + size_t max_contiguous_allocation = 0; + // To allow the Walk/InspectAll() to exclusively-lock the mutator + // lock, temporarily release the shared access to the mutator + // lock here by transitioning to the suspended state. + Locks::mutator_lock_->AssertSharedHeld(self); + self->TransitionFromRunnableToSuspended(kSuspended); + Walk(MSpaceChunkCallback, &max_contiguous_allocation); + self->TransitionFromSuspendedToRunnable(); + Locks::mutator_lock_->AssertSharedHeld(self); + os << "; failed due to fragmentation (largest possible contiguous allocation " + << max_contiguous_allocation << " bytes)"; +} + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h index accd26bd21..7aff14b665 100644 --- a/runtime/gc/space/dlmalloc_space.h +++ b/runtime/gc/space/dlmalloc_space.h @@ -124,6 +124,9 @@ class DlMallocSpace : public MallocSpace { return this; } + void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) OVERRIDE + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + protected: DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin, byte* end, byte* limit, size_t growth_limit, bool can_move_objects, size_t starting_size, diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h index d24016cb18..6f49fbf203 100644 --- a/runtime/gc/space/malloc_space.h +++ b/runtime/gc/space/malloc_space.h @@ -19,6 +19,7 @@ #include "space.h" +#include <iostream> #include <valgrind.h> #include <memcheck/memcheck.h> @@ -132,6 +133,8 @@ class MallocSpace : public ContinuousMemMapAllocSpace { return can_move_objects_; } + virtual void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) = 0; + protected: MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end, byte* limit, size_t growth_limit, bool create_bitmaps, bool can_move_objects, diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h index 2934af87c6..f50530576b 100644 --- a/runtime/gc/space/rosalloc_space.h +++ b/runtime/gc/space/rosalloc_space.h @@ -120,6 +120,10 @@ class RosAllocSpace : public MallocSpace { virtual ~RosAllocSpace(); + void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) OVERRIDE { + rosalloc_->LogFragmentationAllocFailure(os, failed_alloc_bytes); + } + protected: RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc, byte* begin, byte* end, byte* limit, size_t growth_limit, bool can_move_objects, diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc index f459b590eb..0e05b62dde 100644 --- a/runtime/instrumentation.cc +++ b/runtime/instrumentation.cc @@ -146,16 +146,13 @@ void Instrumentation::InstallStubsForMethod(mirror::ArtMethod* method) { // class, all its static methods code will be set to the instrumentation entry point. // For more details, see ClassLinker::FixupStaticTrampolines. if (is_class_initialized || !method->IsStatic() || method->IsConstructor()) { - // Do not overwrite interpreter to prevent from posting method entry/exit events twice. - new_portable_code = class_linker->GetPortableOatCodeFor(method, &have_portable_code); - new_quick_code = class_linker->GetQuickOatCodeFor(method); - DCHECK(new_quick_code != GetQuickToInterpreterBridgeTrampoline(class_linker)); - if (entry_exit_stubs_installed_ && new_quick_code != GetQuickToInterpreterBridge()) { - // TODO: portable to quick bridge. Bug: 8196384. We cannot enable the check below as long - // as GetPortableToQuickBridge() == GetPortableToInterpreterBridge(). - // DCHECK(new_portable_code != GetPortableToInterpreterBridge()); + if (entry_exit_stubs_installed_) { new_portable_code = GetPortableToInterpreterBridge(); new_quick_code = GetQuickInstrumentationEntryPoint(); + } else { + new_portable_code = class_linker->GetPortableOatCodeFor(method, &have_portable_code); + new_quick_code = class_linker->GetQuickOatCodeFor(method); + DCHECK(new_quick_code != GetQuickToInterpreterBridgeTrampoline(class_linker)); } } else { new_portable_code = GetPortableResolutionTrampoline(class_linker); @@ -175,7 +172,6 @@ static void InstrumentationInstallStack(Thread* thread, void* arg) struct InstallStackVisitor : public StackVisitor { InstallStackVisitor(Thread* thread, Context* context, uintptr_t instrumentation_exit_pc) : StackVisitor(thread, context), instrumentation_stack_(thread->GetInstrumentationStack()), - existing_instrumentation_frames_count_(instrumentation_stack_->size()), instrumentation_exit_pc_(instrumentation_exit_pc), reached_existing_instrumentation_frames_(false), instrumentation_stack_depth_(0), last_return_pc_(0) { @@ -190,18 +186,10 @@ static void InstrumentationInstallStack(Thread* thread, void* arg) last_return_pc_ = 0; return true; // Ignore upcalls. } - if (m->IsRuntimeMethod()) { - if (kVerboseInstrumentation) { - LOG(INFO) << " Skipping runtime method. Frame " << GetFrameId(); - } - last_return_pc_ = GetReturnPc(); - return true; // Ignore unresolved methods since they will be instrumented after resolution. - } - if (kVerboseInstrumentation) { - LOG(INFO) << " Installing exit stub in " << DescribeLocation(); - } if (GetCurrentQuickFrame() == NULL) { - InstrumentationStackFrame instrumentation_frame(GetThisObject(), m, 0, GetFrameId(), false); + bool interpreter_frame = !m->IsPortableCompiled(); + InstrumentationStackFrame instrumentation_frame(GetThisObject(), m, 0, GetFrameId(), + interpreter_frame); if (kVerboseInstrumentation) { LOG(INFO) << "Pushing shadow frame " << instrumentation_frame.Dump(); } @@ -209,6 +197,32 @@ static void InstrumentationInstallStack(Thread* thread, void* arg) return true; // Continue. } uintptr_t return_pc = GetReturnPc(); + if (m->IsRuntimeMethod()) { + if (return_pc == instrumentation_exit_pc_) { + if (kVerboseInstrumentation) { + LOG(INFO) << " Handling quick to interpreter transition. Frame " << GetFrameId(); + } + CHECK_LT(instrumentation_stack_depth_, instrumentation_stack_->size()); + const InstrumentationStackFrame& frame = instrumentation_stack_->at(instrumentation_stack_depth_); + CHECK(frame.interpreter_entry_); + // This is an interpreter frame so method enter event must have been reported. However we + // need to push a DEX pc into the dex_pcs_ list to match size of instrumentation stack. + // Since we won't report method entry here, we can safely push any DEX pc. + dex_pcs_.push_back(0); + last_return_pc_ = frame.return_pc_; + ++instrumentation_stack_depth_; + return true; + } else { + if (kVerboseInstrumentation) { + LOG(INFO) << " Skipping runtime method. Frame " << GetFrameId(); + } + last_return_pc_ = GetReturnPc(); + return true; // Ignore unresolved methods since they will be instrumented after resolution. + } + } + if (kVerboseInstrumentation) { + LOG(INFO) << " Installing exit stub in " << DescribeLocation(); + } if (return_pc == instrumentation_exit_pc_) { // We've reached a frame which has already been installed with instrumentation exit stub. // We should have already installed instrumentation on previous frames. @@ -231,8 +245,15 @@ static void InstrumentationInstallStack(Thread* thread, void* arg) LOG(INFO) << "Pushing frame " << instrumentation_frame.Dump(); } - // Insert frame before old ones so we do not corrupt the instrumentation stack. - auto it = instrumentation_stack_->end() - existing_instrumentation_frames_count_; + // Insert frame at the right position so we do not corrupt the instrumentation stack. + // Instrumentation stack frames are in descending frame id order. + auto it = instrumentation_stack_->begin(); + for (auto end = instrumentation_stack_->end(); it != end; ++it) { + const InstrumentationStackFrame& current = *it; + if (instrumentation_frame.frame_id_ >= current.frame_id_) { + break; + } + } instrumentation_stack_->insert(it, instrumentation_frame); SetReturnPc(instrumentation_exit_pc_); } @@ -243,7 +264,6 @@ static void InstrumentationInstallStack(Thread* thread, void* arg) } std::deque<InstrumentationStackFrame>* const instrumentation_stack_; std::vector<InstrumentationStackFrame> shadow_stack_; - const size_t existing_instrumentation_frames_count_; std::vector<uint32_t> dex_pcs_; const uintptr_t instrumentation_exit_pc_; bool reached_existing_instrumentation_frames_; @@ -275,7 +295,9 @@ static void InstrumentationInstallStack(Thread* thread, void* arg) } uint32_t dex_pc = visitor.dex_pcs_.back(); visitor.dex_pcs_.pop_back(); - instrumentation->MethodEnterEvent(thread, (*isi).this_object_, (*isi).method_, dex_pc); + if (!isi->interpreter_entry_) { + instrumentation->MethodEnterEvent(thread, (*isi).this_object_, (*isi).method_, dex_pc); + } } } thread->VerifyStack(); @@ -606,7 +628,7 @@ void Instrumentation::Deoptimize(mirror::ArtMethod* method) { CHECK(!already_deoptimized) << "Method " << PrettyMethod(method) << " is already deoptimized"; if (!interpreter_stubs_installed_) { - UpdateEntrypoints(method, GetQuickToInterpreterBridge(), GetPortableToInterpreterBridge(), + UpdateEntrypoints(method, GetQuickInstrumentationEntryPoint(), GetPortableToInterpreterBridge(), false); // Install instrumentation exit stub and instrumentation frames. We may already have installed @@ -844,7 +866,9 @@ void Instrumentation::PushInstrumentationStackFrame(Thread* self, mirror::Object frame_id, interpreter_entry); stack->push_front(instrumentation_frame); - MethodEnterEvent(self, this_object, method, 0); + if (!interpreter_entry) { + MethodEnterEvent(self, this_object, method, 0); + } } TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc, @@ -875,7 +899,9 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, uintpt // return_pc. uint32_t dex_pc = DexFile::kDexNoIndex; mirror::Object* this_object = instrumentation_frame.this_object_; - MethodExitEvent(self, this_object, instrumentation_frame.method_, dex_pc, return_value); + if (!instrumentation_frame.interpreter_entry_) { + MethodExitEvent(self, this_object, instrumentation_frame.method_, dex_pc, return_value); + } // Deoptimize if the caller needs to continue execution in the interpreter. Do nothing if we get // back to an upcall. diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc index 9cc144149b..2db62f8ead 100644 --- a/runtime/interpreter/interpreter.cc +++ b/runtime/interpreter/interpreter.cc @@ -95,9 +95,11 @@ static void UnstartedRuntimeJni(Thread* self, ArtMethod* method, jint newValue = args[4]; bool success; if (Runtime::Current()->IsActiveTransaction()) { - success = obj->CasField32<true>(MemberOffset(offset), expectedValue, newValue); + success = obj->CasFieldWeakSequentiallyConsistent32<true>(MemberOffset(offset), + expectedValue, newValue); } else { - success = obj->CasField32<false>(MemberOffset(offset), expectedValue, newValue); + success = obj->CasFieldWeakSequentiallyConsistent32<false>(MemberOffset(offset), + expectedValue, newValue); } result->SetZ(success ? JNI_TRUE : JNI_FALSE); } else if (name == "void sun.misc.Unsafe.putObject(java.lang.Object, long, java.lang.Object)") { diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h index 62c1162b13..089ef57310 100644 --- a/runtime/mirror/object-inl.h +++ b/runtime/mirror/object-inl.h @@ -69,10 +69,10 @@ inline void Object::SetLockWord(LockWord new_val, bool as_volatile) { } } -inline bool Object::CasLockWord(LockWord old_val, LockWord new_val) { +inline bool Object::CasLockWordWeakSequentiallyConsistent(LockWord old_val, LockWord new_val) { // Force use of non-transactional mode and do not check. - return CasField32<false, false>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), - new_val.GetValue()); + return CasFieldWeakSequentiallyConsistent32<false, false>( + OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue()); } inline uint32_t Object::GetLockOwnerThreadId() { @@ -131,21 +131,17 @@ inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* DCHECK(kUseBakerOrBrooksReadBarrier); MemberOffset offset = OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_); byte* raw_addr = reinterpret_cast<byte*>(this) + offset.SizeValue(); - HeapReference<Object>* ref = reinterpret_cast<HeapReference<Object>*>(raw_addr); + Atomic<uint32_t>* atomic_rb_ptr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr); HeapReference<Object> expected_ref(HeapReference<Object>::FromMirrorPtr(expected_rb_ptr)); HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(rb_ptr)); - uint32_t expected_val = expected_ref.reference_; - uint32_t new_val; do { - uint32_t old_val = ref->reference_; - if (old_val != expected_val) { + if (UNLIKELY(atomic_rb_ptr->LoadRelaxed() != expected_ref.reference_)) { // Lost the race. return false; } - new_val = new_ref.reference_; - } while (!__sync_bool_compare_and_swap( - reinterpret_cast<uint32_t*>(raw_addr), expected_val, new_val)); - DCHECK_EQ(new_val, ref->reference_); + } while (!atomic_rb_ptr->CompareExchangeWeakSequentiallyConsistent(expected_ref.reference_, + new_ref.reference_)); + DCHECK_EQ(new_ref.reference_, atomic_rb_ptr->LoadRelaxed()); return true; #else LOG(FATAL) << "Unreachable"; @@ -448,7 +444,8 @@ inline void Object::SetField32Volatile(MemberOffset field_offset, int32_t new_va } template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags> -inline bool Object::CasField32(MemberOffset field_offset, int32_t old_value, int32_t new_value) { +inline bool Object::CasFieldWeakSequentiallyConsistent32(MemberOffset field_offset, + int32_t old_value, int32_t new_value) { if (kCheckTransaction) { DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction()); } @@ -459,9 +456,9 @@ inline bool Object::CasField32(MemberOffset field_offset, int32_t old_value, int VerifyObject(this); } byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); - volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw_addr); + AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr); - return __sync_bool_compare_and_swap(addr, old_value, new_value); + return atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_value, new_value); } template<VerifyObjectFlags kVerifyFlags, bool kIsVolatile> @@ -513,7 +510,8 @@ inline void Object::SetField64Volatile(MemberOffset field_offset, int64_t new_va } template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags> -inline bool Object::CasField64(MemberOffset field_offset, int64_t old_value, int64_t new_value) { +inline bool Object::CasFieldWeakSequentiallyConsistent64(MemberOffset field_offset, + int64_t old_value, int64_t new_value) { if (kCheckTransaction) { DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction()); } @@ -524,8 +522,8 @@ inline bool Object::CasField64(MemberOffset field_offset, int64_t old_value, int VerifyObject(this); } byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); - volatile int64_t* addr = reinterpret_cast<volatile int64_t*>(raw_addr); - return QuasiAtomic::Cas64(old_value, new_value, addr); + Atomic<int64_t>* atomic_addr = reinterpret_cast<Atomic<int64_t>*>(raw_addr); + return atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_value, new_value); } template<class T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, @@ -615,8 +613,8 @@ inline HeapReference<Object>* Object::GetFieldObjectReferenceAddr(MemberOffset f } template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags> -inline bool Object::CasFieldObject(MemberOffset field_offset, Object* old_value, - Object* new_value) { +inline bool Object::CasFieldWeakSequentiallyConsistentObject(MemberOffset field_offset, + Object* old_value, Object* new_value) { if (kCheckTransaction) { DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction()); } @@ -632,11 +630,14 @@ inline bool Object::CasFieldObject(MemberOffset field_offset, Object* old_value, if (kTransactionActive) { Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true); } - byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); - volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw_addr); HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value)); HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value)); - bool success = __sync_bool_compare_and_swap(addr, old_ref.reference_, new_ref.reference_); + byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value(); + Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr); + + bool success = atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_ref.reference_, + new_ref.reference_); + if (success) { Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value); } diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc index 422a88b688..e58091fe09 100644 --- a/runtime/mirror/object.cc +++ b/runtime/mirror/object.cc @@ -156,7 +156,7 @@ int32_t Object::IdentityHashCode() const { // loop iteration. LockWord hash_word(LockWord::FromHashCode(GenerateIdentityHashCode())); DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode); - if (const_cast<Object*>(this)->CasLockWord(lw, hash_word)) { + if (const_cast<Object*>(this)->CasLockWordWeakSequentiallyConsistent(lw, hash_word)) { return hash_word.GetHashCode(); } break; diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h index c082443ad9..d29011a4b5 100644 --- a/runtime/mirror/object.h +++ b/runtime/mirror/object.h @@ -110,7 +110,8 @@ class MANAGED LOCKABLE Object { // have C++11 "strong" semantics. // TODO: In most, possibly all, cases, these assumptions are too strong. // Confirm and weaken the implementation. - bool CasLockWord(LockWord old_val, LockWord new_val) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + bool CasLockWordWeakSequentiallyConsistent(LockWord old_val, LockWord new_val) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); uint32_t GetLockOwnerThreadId(); mirror::Object* MonitorEnter(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) @@ -226,7 +227,8 @@ class MANAGED LOCKABLE Object { template<bool kTransactionActive, bool kCheckTransaction = true, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> - bool CasFieldObject(MemberOffset field_offset, Object* old_value, Object* new_value) + bool CasFieldWeakSequentiallyConsistentObject(MemberOffset field_offset, Object* old_value, + Object* new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> @@ -252,7 +254,8 @@ class MANAGED LOCKABLE Object { template<bool kTransactionActive, bool kCheckTransaction = true, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> - bool CasField32(MemberOffset field_offset, int32_t old_value, int32_t new_value) ALWAYS_INLINE + bool CasFieldWeakSequentiallyConsistent32(MemberOffset field_offset, int32_t old_value, + int32_t new_value) ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false> @@ -275,7 +278,8 @@ class MANAGED LOCKABLE Object { template<bool kTransactionActive, bool kCheckTransaction = true, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> - bool CasField64(MemberOffset field_offset, int64_t old_value, int64_t new_value) + bool CasFieldWeakSequentiallyConsistent64(MemberOffset field_offset, int64_t old_value, + int64_t new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); template<bool kTransactionActive, bool kCheckTransaction = true, diff --git a/runtime/monitor.cc b/runtime/monitor.cc index c3ec38d1d3..5633a77b6f 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -163,7 +163,7 @@ bool Monitor::Install(Thread* self) { } LockWord fat(this); // Publish the updated lock word, which may race with other threads. - bool success = GetObject()->CasLockWord(lw, fat); + bool success = GetObject()->CasLockWordWeakSequentiallyConsistent(lw, fat); // Lock profiling. if (success && owner_ != nullptr && lock_profiling_threshold_ != 0) { locking_method_ = owner_->GetCurrentMethod(&locking_dex_pc_); @@ -722,7 +722,7 @@ mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj) { switch (lock_word.GetState()) { case LockWord::kUnlocked: { LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0)); - if (h_obj->CasLockWord(lock_word, thin_locked)) { + if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) { // CasLockWord enforces more than the acquire ordering we need here. return h_obj.Get(); // Success! } diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc index d23cfff0ce..7cc4cac83f 100644 --- a/runtime/native/sun_misc_Unsafe.cc +++ b/runtime/native/sun_misc_Unsafe.cc @@ -28,7 +28,8 @@ static jboolean Unsafe_compareAndSwapInt(JNIEnv* env, jobject, jobject javaObj, ScopedFastNativeObjectAccess soa(env); mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj); // JNI must use non transactional mode. - bool success = obj->CasField32<false>(MemberOffset(offset), expectedValue, newValue); + bool success = obj->CasFieldWeakSequentiallyConsistent32<false>(MemberOffset(offset), + expectedValue, newValue); return success ? JNI_TRUE : JNI_FALSE; } @@ -37,7 +38,8 @@ static jboolean Unsafe_compareAndSwapLong(JNIEnv* env, jobject, jobject javaObj, ScopedFastNativeObjectAccess soa(env); mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj); // JNI must use non transactional mode. - bool success = obj->CasField64<false>(MemberOffset(offset), expectedValue, newValue); + bool success = obj->CasFieldWeakSequentiallyConsistent64<false>(MemberOffset(offset), + expectedValue, newValue); return success ? JNI_TRUE : JNI_FALSE; } @@ -48,7 +50,8 @@ static jboolean Unsafe_compareAndSwapObject(JNIEnv* env, jobject, jobject javaOb mirror::Object* expectedValue = soa.Decode<mirror::Object*>(javaExpectedValue); mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue); // JNI must use non transactional mode. - bool success = obj->CasFieldObject<false>(MemberOffset(offset), expectedValue, newValue); + bool success = obj->CasFieldWeakSequentiallyConsistentObject<false>(MemberOffset(offset), + expectedValue, newValue); return success ? JNI_TRUE : JNI_FALSE; } |