diff options
54 files changed, 1676 insertions, 1068 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 4c19ba0b4c..cf703a03da 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -115,6 +115,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ runtime/gc/space/rosalloc_space_static_test.cc \ runtime/gc/space/rosalloc_space_random_test.cc \ runtime/gc/space/large_object_space_test.cc \ + runtime/gc/task_processor_test.cc \ runtime/gtest_test.cc \ runtime/handle_scope_test.cc \ runtime/indenter_test.cc \ diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc index 6a610ab201..e5358139d8 100644 --- a/compiler/dex/bb_optimizations.cc +++ b/compiler/dex/bb_optimizations.cc @@ -51,20 +51,4 @@ bool BBCombine::Worker(PassDataHolder* data) const { return false; } -/* - * BasicBlock Optimization pass implementation start. - */ -void BBOptimizations::Start(PassDataHolder* data) const { - DCHECK(data != nullptr); - CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; - DCHECK(c_unit != nullptr); - /* - * This pass has a different ordering depEnding on the suppress exception, - * so do the pass here for now: - * - Later, the Start should just change the ordering and we can move the extended - * creation into the pass driver's main job with a new iterator - */ - c_unit->mir_graph->BasicBlockOptimization(); -} - } // namespace art diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h index 0407e323cb..b07a415d4a 100644 --- a/compiler/dex/bb_optimizations.h +++ b/compiler/dex/bb_optimizations.h @@ -284,7 +284,8 @@ class BBCombine : public PassME { */ class BBOptimizations : public PassME { public: - BBOptimizations() : PassME("BBOptimizations", kNoNodes, "5_post_bbo_cfg") { + BBOptimizations() + : PassME("BBOptimizations", kNoNodes, kOptimizationBasicBlockChange, "5_post_bbo_cfg") { } bool Gate(const PassDataHolder* data) const { @@ -294,7 +295,28 @@ class BBOptimizations : public PassME { return ((c_unit->disable_opt & (1 << kBBOpt)) == 0); } - void Start(PassDataHolder* data) const; + void Start(PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph->BasicBlockOptimizationStart(); + + /* + * This pass has a different ordering depending on the suppress exception, + * so do the pass here for now: + * - Later, the Start should just change the ordering and we can move the extended + * creation into the pass driver's main job with a new iterator + */ + c_unit->mir_graph->BasicBlockOptimization(); + } + + void End(PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph->BasicBlockOptimizationEnd(); + down_cast<PassMEDataHolder*>(data)->dirty = !c_unit->mir_graph->DfsOrdersUpToDate(); + } }; /** diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 7ff06a04cb..7edb490176 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -555,7 +555,7 @@ std::ostream& operator<<(std::ostream& os, const DividePattern& pattern); * The current recipe is as follows: * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store. * -# Use AnyAny barrier after volatile store. (StoreLoad is as expensive.) - * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrierafter each volatile load. + * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load. * -# Use StoreStore barrier after all stores but before return from any constructor whose * class has final fields. * -# Use NTStoreStore to order non-temporal stores with respect to all later diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc index 7e3b4d8adf..18e346968a 100644 --- a/compiler/dex/global_value_numbering_test.cc +++ b/compiler/dex/global_value_numbering_test.cc @@ -215,7 +215,6 @@ class GlobalValueNumberingTest : public testing::Test { bb->data_flow_info->live_in_v = live_in_v_; } } - cu_.mir_graph->num_blocks_ = count; ASSERT_EQ(count, cu_.mir_graph->block_list_.size()); cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1]; ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type); diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc index 7b53b14909..0f0846c74c 100644 --- a/compiler/dex/mir_analysis.cc +++ b/compiler/dex/mir_analysis.cc @@ -1151,7 +1151,7 @@ bool MIRGraph::SkipCompilation(std::string* skip_message) { skip_compilation = true; *skip_message = "Huge method: " + std::to_string(GetNumDalvikInsns()); // If we're got a huge number of basic blocks, don't bother with further analysis. - if (static_cast<size_t>(num_blocks_) > (compiler_options.GetHugeMethodThreshold() / 2)) { + if (static_cast<size_t>(GetNumBlocks()) > (compiler_options.GetHugeMethodThreshold() / 2)) { return true; } } else if (compiler_options.IsLargeMethod(GetNumDalvikInsns()) && diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 71ad635ac4..312a6ebcd6 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -91,6 +91,9 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) num_reachable_blocks_(0), max_num_reachable_blocks_(0), dfs_orders_up_to_date_(false), + domination_up_to_date_(false), + mir_ssa_rep_up_to_date_(false), + topological_order_up_to_date_(false), dfs_order_(arena->Adapter(kArenaAllocDfsPreOrder)), dfs_post_order_(arena->Adapter(kArenaAllocDfsPostOrder)), dom_post_order_traversal_(arena->Adapter(kArenaAllocDomPostOrder)), @@ -105,7 +108,6 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) try_block_addr_(NULL), entry_block_(NULL), exit_block_(NULL), - num_blocks_(0), current_code_item_(NULL), dex_pc_to_block_map_(arena->Adapter()), m_units_(arena->Adapter()), @@ -691,7 +693,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ if (current_method_ == 0) { DCHECK(entry_block_ == NULL); DCHECK(exit_block_ == NULL); - DCHECK_EQ(num_blocks_, 0U); + DCHECK_EQ(GetNumBlocks(), 0U); // Use id 0 to represent a null block. BasicBlock* null_block = CreateNewBB(kNullBlock); DCHECK_EQ(null_block->id, NullBasicBlockId); @@ -1740,6 +1742,9 @@ void MIRGraph::SSATransformationEnd() { // Update the maximum number of reachable blocks. max_num_reachable_blocks_ = num_reachable_blocks_; + + // Mark MIR SSA representations as up to date. + mir_ssa_rep_up_to_date_ = true; } size_t MIRGraph::GetNumDalvikInsns() const { @@ -2005,6 +2010,7 @@ void MIRGraph::ComputeTopologicalSortOrder() { topological_order_loop_head_stack_.clear(); topological_order_loop_head_stack_.reserve(max_nested_loops); max_nested_loops_ = max_nested_loops; + topological_order_up_to_date_ = true; } bool BasicBlock::IsExceptionBlock() const { @@ -2246,12 +2252,6 @@ void BasicBlock::Kill(MIRGraph* mir_graph) { } predecessors.clear(); - KillUnreachable(mir_graph); -} - -void BasicBlock::KillUnreachable(MIRGraph* mir_graph) { - DCHECK(predecessors.empty()); // Unreachable. - // Mark as dead and hidden. block_type = kDead; hidden = true; @@ -2270,9 +2270,6 @@ void BasicBlock::KillUnreachable(MIRGraph* mir_graph) { ChildBlockIterator iter(this, mir_graph); for (BasicBlock* succ_bb = iter.Next(); succ_bb != nullptr; succ_bb = iter.Next()) { succ_bb->ErasePredecessor(id); - if (succ_bb->predecessors.empty()) { - succ_bb->KillUnreachable(mir_graph); - } } // Remove links to children. @@ -2393,7 +2390,8 @@ void BasicBlock::UpdatePredecessor(BasicBlockId old_pred, BasicBlockId new_pred) // Create a new basic block with block_id as num_blocks_ that is // post-incremented. BasicBlock* MIRGraph::CreateNewBB(BBType block_type) { - BasicBlock* res = NewMemBB(block_type, num_blocks_++); + BasicBlockId id = static_cast<BasicBlockId>(block_list_.size()); + BasicBlock* res = NewMemBB(block_type, id); block_list_.push_back(res); return res; } @@ -2403,10 +2401,6 @@ void MIRGraph::CalculateBasicBlockInformation() { driver.Launch(); } -void MIRGraph::InitializeBasicBlockData() { - num_blocks_ = block_list_.size(); -} - int MIR::DecodedInstruction::FlagsOf() const { // Calculate new index. int idx = static_cast<int>(opcode) - kNumPackedOpcodes; diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 851ca150b5..af97f51975 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -410,18 +410,12 @@ class BasicBlock : public DeletableArenaObject<kArenaAllocBB> { /** * @brief Kill the BasicBlock. - * @details Unlink predecessors to make this block unreachable, then KillUnreachable(). + * @details Unlink predecessors and successors, remove all MIRs, set the block type to kDead + * and set hidden to true. */ void Kill(MIRGraph* mir_graph); /** - * @brief Kill the unreachable block and all blocks that become unreachable by killing this one. - * @details Set the block type to kDead and set hidden to true, remove all MIRs, - * unlink all successors and recursively kill successors that become unreachable. - */ - void KillUnreachable(MIRGraph* mir_graph); - - /** * @brief Is ssa_reg the last SSA definition of that VR in the block? */ bool IsSSALiveOut(const CompilationUnit* c_unit, int ssa_reg); @@ -574,7 +568,7 @@ class MIRGraph { } unsigned int GetNumBlocks() const { - return num_blocks_; + return block_list_.size(); } /** @@ -704,7 +698,9 @@ class MIRGraph { void DumpRegLocTable(RegLocation* table, int count); + void BasicBlockOptimizationStart(); void BasicBlockOptimization(); + void BasicBlockOptimizationEnd(); const ArenaVector<BasicBlockId>& GetTopologicalSortOrder() { DCHECK(!topological_order_.empty()); @@ -1198,7 +1194,6 @@ class MIRGraph { void AllocateSSAUseData(MIR *mir, int num_uses); void AllocateSSADefData(MIR *mir, int num_defs); void CalculateBasicBlockInformation(); - void InitializeBasicBlockData(); void ComputeDFSOrders(); void ComputeDefBlockMatrix(); void ComputeDominators(); @@ -1211,6 +1206,18 @@ class MIRGraph { return dfs_orders_up_to_date_; } + bool DominationUpToDate() const { + return domination_up_to_date_; + } + + bool MirSsaRepUpToDate() const { + return mir_ssa_rep_up_to_date_; + } + + bool TopologicalOrderUpToDate() const { + return topological_order_up_to_date_; + } + /* * IsDebugBuild sanity check: keep track of the Dex PCs for catch entries so that later on * we can verify that all catch entries have native PC entries. @@ -1321,6 +1328,9 @@ class MIRGraph { unsigned int num_reachable_blocks_; unsigned int max_num_reachable_blocks_; bool dfs_orders_up_to_date_; + bool domination_up_to_date_; + bool mir_ssa_rep_up_to_date_; + bool topological_order_up_to_date_; ArenaVector<BasicBlockId> dfs_order_; ArenaVector<BasicBlockId> dfs_post_order_; ArenaVector<BasicBlockId> dom_post_order_traversal_; @@ -1379,7 +1389,6 @@ class MIRGraph { ArenaBitVector* try_block_addr_; BasicBlock* entry_block_; BasicBlock* exit_block_; - unsigned int num_blocks_; const DexFile::CodeItem* current_code_item_; ArenaVector<uint16_t> dex_pc_to_block_map_; // FindBlock lookup cache. ArenaVector<DexCompilationUnit*> m_units_; // List of methods included in this graph diff --git a/compiler/dex/mir_graph_test.cc b/compiler/dex/mir_graph_test.cc index a96cd84297..8a7e71f4af 100644 --- a/compiler/dex/mir_graph_test.cc +++ b/compiler/dex/mir_graph_test.cc @@ -89,7 +89,6 @@ class TopologicalSortOrderTest : public testing::Test { cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo)); } } - cu_.mir_graph->num_blocks_ = count; ASSERT_EQ(count, cu_.mir_graph->block_list_.size()); cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1]; ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type); diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 6e9844cb7f..15b83413b7 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -485,9 +485,11 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { mir->ssa_rep->num_uses = 0; BasicBlock* successor_to_unlink = GetBasicBlock(edge_to_kill); successor_to_unlink->ErasePredecessor(bb->id); - if (successor_to_unlink->predecessors.empty()) { - successor_to_unlink->KillUnreachable(this); - } + // We have changed the graph structure. + dfs_orders_up_to_date_ = false; + domination_up_to_date_ = false; + topological_order_up_to_date_ = false; + // Keep MIR SSA rep, the worst that can happen is a Phi with just 1 input. } break; case Instruction::CMPL_FLOAT: @@ -649,36 +651,36 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { * Phi node only contains our two cases as input, we will use the result * SSA name of the Phi node as our select result and delete the Phi. If * the Phi node has more than two operands, we will arbitrarily use the SSA - * name of the "true" path, delete the SSA name of the "false" path from the + * name of the "false" path, delete the SSA name of the "true" path from the * Phi node (and fix up the incoming arc list). */ if (phi->ssa_rep->num_uses == 2) { mir->ssa_rep->defs[0] = phi->ssa_rep->defs[0]; - phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); + // Rather than changing the Phi to kMirOpNop, remove it completely. + // This avoids leaving other Phis after kMirOpNop (i.e. a non-Phi) insn. + tk_tk->RemoveMIR(phi); + int dead_false_def = if_false->ssa_rep->defs[0]; + raw_use_counts_[dead_false_def] = use_counts_[dead_false_def] = 0; } else { - int dead_def = if_false->ssa_rep->defs[0]; - int live_def = if_true->ssa_rep->defs[0]; + int live_def = if_false->ssa_rep->defs[0]; mir->ssa_rep->defs[0] = live_def; - BasicBlockId* incoming = phi->meta.phi_incoming; - for (int i = 0; i < phi->ssa_rep->num_uses; i++) { - if (phi->ssa_rep->uses[i] == live_def) { - incoming[i] = bb->id; - } - } - for (int i = 0; i < phi->ssa_rep->num_uses; i++) { - if (phi->ssa_rep->uses[i] == dead_def) { - int last_slot = phi->ssa_rep->num_uses - 1; - phi->ssa_rep->uses[i] = phi->ssa_rep->uses[last_slot]; - incoming[i] = incoming[last_slot]; - } - } - } - phi->ssa_rep->num_uses--; - bb->taken = NullBasicBlockId; - tk->block_type = kDead; - for (MIR* tmir = ft->first_mir_insn; tmir != NULL; tmir = tmir->next) { - tmir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); } + int dead_true_def = if_true->ssa_rep->defs[0]; + raw_use_counts_[dead_true_def] = use_counts_[dead_true_def] = 0; + // We want to remove ft and tk and link bb directly to ft_ft. First, we need + // to update all Phi inputs correctly with UpdatePredecessor(ft->id, bb->id) + // since the live_def above comes from ft->first_mir_insn (if_false). + DCHECK(if_false == ft->first_mir_insn); + ft_ft->UpdatePredecessor(ft->id, bb->id); + // Correct the rest of the links between bb, ft and ft_ft. + ft->ErasePredecessor(bb->id); + ft->fall_through = NullBasicBlockId; + bb->fall_through = ft_ft->id; + // Now we can kill tk and ft. + tk->Kill(this); + ft->Kill(this); + // NOTE: DFS order, domination info and topological order are still usable + // despite the newly dead blocks. } } } @@ -788,43 +790,9 @@ void MIRGraph::CombineBlocks(class BasicBlock* bb) { MIR* mir = bb->last_mir_insn; DCHECK(bb->first_mir_insn != nullptr); - // Grab the attributes from the paired opcode. + // Get the paired insn and check if it can still throw. MIR* throw_insn = mir->meta.throw_insn; - uint64_t df_attributes = GetDataFlowAttributes(throw_insn); - - // Don't combine if the throw_insn can still throw NPE. - if ((df_attributes & DF_HAS_NULL_CHKS) != 0 && - (throw_insn->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) { - break; - } - // Now whitelist specific instructions. - bool ok = false; - if ((df_attributes & DF_IFIELD) != 0) { - // Combine only if fast, otherwise weird things can happen. - const MirIFieldLoweringInfo& field_info = GetIFieldLoweringInfo(throw_insn); - ok = (df_attributes & DF_DA) ? field_info.FastGet() : field_info.FastPut(); - } else if ((df_attributes & DF_SFIELD) != 0) { - // Combine only if fast, otherwise weird things can happen. - const MirSFieldLoweringInfo& field_info = GetSFieldLoweringInfo(throw_insn); - bool fast = ((df_attributes & DF_DA) ? field_info.FastGet() : field_info.FastPut()); - // Don't combine if the SGET/SPUT can call <clinit>(). - bool clinit = !field_info.IsClassInitialized() && - (throw_insn->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0; - ok = fast && !clinit; - } else if ((df_attributes & DF_HAS_RANGE_CHKS) != 0) { - // Only AGET/APUT have range checks. We have processed the AGET/APUT null check above. - DCHECK_NE(throw_insn->optimization_flags & MIR_IGNORE_NULL_CHECK, 0); - ok = ((throw_insn->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0); - } else if ((throw_insn->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) { - // We can encounter a non-throwing insn here thanks to inlining or other optimizations. - ok = true; - } else if (throw_insn->dalvikInsn.opcode == Instruction::ARRAY_LENGTH || - throw_insn->dalvikInsn.opcode == Instruction::FILL_ARRAY_DATA || - static_cast<int>(throw_insn->dalvikInsn.opcode) == kMirOpNullCheck) { - // No more checks for these (null check was processed above). - ok = true; - } - if (!ok) { + if (CanThrow(throw_insn)) { break; } @@ -863,9 +831,6 @@ void MIRGraph::CombineBlocks(class BasicBlock* bb) { BasicBlock* succ_bb = GetBasicBlock(succ_info->block); DCHECK(succ_bb->catch_entry); succ_bb->ErasePredecessor(bb->id); - if (succ_bb->predecessors.empty()) { - succ_bb->KillUnreachable(this); - } } } } @@ -908,8 +873,10 @@ void MIRGraph::CombineBlocks(class BasicBlock* bb) { child->UpdatePredecessor(bb_next->id, bb->id); } - // DFS orders are not up to date anymore. + // DFS orders, domination and topological order are not up to date anymore. dfs_orders_up_to_date_ = false; + domination_up_to_date_ = false; + topological_order_up_to_date_ = false; // Now, loop back and see if we can keep going } @@ -1581,7 +1548,7 @@ bool MIRGraph::BuildExtendedBBList(class BasicBlock* bb) { return false; // Not iterative - return value will be ignored } -void MIRGraph::BasicBlockOptimization() { +void MIRGraph::BasicBlockOptimizationStart() { if ((cu_->disable_opt & (1 << kLocalValueNumbering)) == 0) { temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack)); temp_.gvn.ifield_ids_ = @@ -1589,7 +1556,9 @@ void MIRGraph::BasicBlockOptimization() { temp_.gvn.sfield_ids_ = GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_); } +} +void MIRGraph::BasicBlockOptimization() { if ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) { ClearAllVisitedFlags(); PreOrderDfsIterator iter2(this); @@ -1606,7 +1575,9 @@ void MIRGraph::BasicBlockOptimization() { BasicBlockOpt(bb); } } +} +void MIRGraph::BasicBlockOptimizationEnd() { // Clean up after LVN. temp_.gvn.ifield_ids_ = nullptr; temp_.gvn.sfield_ids_ = nullptr; @@ -1719,32 +1690,37 @@ bool MIRGraph::CanThrow(MIR* mir) { const int opt_flags = mir->optimization_flags; uint64_t df_attributes = GetDataFlowAttributes(mir); + // First, check if the insn can still throw NPE. if (((df_attributes & DF_HAS_NULL_CHKS) != 0) && ((opt_flags & MIR_IGNORE_NULL_CHECK) == 0)) { return true; } + + // Now process specific instructions. if ((df_attributes & DF_IFIELD) != 0) { - // The IGET/IPUT family. + // The IGET/IPUT family. We have processed the IGET/IPUT null check above. + DCHECK_NE(opt_flags & MIR_IGNORE_NULL_CHECK, 0); + // If not fast, weird things can happen and the insn can throw. const MirIFieldLoweringInfo& field_info = GetIFieldLoweringInfo(mir); - bool fast = (df_attributes & DF_DA) ? field_info.FastGet() : field_info.FastPut(); - // Already processed null check above. - if (fast) { - return false; - } - } else if ((df_attributes & DF_HAS_RANGE_CHKS) != 0) { - // The AGET/APUT family. - // Already processed null check above. - if ((opt_flags & MIR_IGNORE_RANGE_CHECK) != 0) { - return false; - } + bool fast = (df_attributes & DF_DA) != 0 ? field_info.FastGet() : field_info.FastPut(); + return !fast; } else if ((df_attributes & DF_SFIELD) != 0) { - // The SGET/SPUT family. + // The SGET/SPUT family. Check for potentially throwing class initialization. + // Also, if not fast, weird things can happen and the insn can throw. const MirSFieldLoweringInfo& field_info = GetSFieldLoweringInfo(mir); - bool fast = (df_attributes & DF_DA) ? field_info.FastGet() : field_info.FastPut(); + bool fast = (df_attributes & DF_DA) != 0 ? field_info.FastGet() : field_info.FastPut(); bool is_class_initialized = field_info.IsClassInitialized() || ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0); - if (fast && is_class_initialized) { - return false; - } + return !(fast && is_class_initialized); + } else if ((df_attributes & DF_HAS_RANGE_CHKS) != 0) { + // Only AGET/APUT have range checks. We have processed the AGET/APUT null check above. + DCHECK_NE(opt_flags & MIR_IGNORE_NULL_CHECK, 0); + // Non-throwing only if range check has been eliminated. + return ((opt_flags & MIR_IGNORE_RANGE_CHECK) == 0); + } else if (mir->dalvikInsn.opcode == Instruction::ARRAY_LENGTH || + mir->dalvikInsn.opcode == Instruction::FILL_ARRAY_DATA || + static_cast<int>(mir->dalvikInsn.opcode) == kMirOpNullCheck) { + // No more checks for these (null check was processed above). + return false; } return true; } diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc index 6c2e9c0b27..362c7fdc05 100644 --- a/compiler/dex/mir_optimization_test.cc +++ b/compiler/dex/mir_optimization_test.cc @@ -129,7 +129,6 @@ class MirOptimizationTest : public testing::Test { cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo)); } } - cu_.mir_graph->num_blocks_ = count; ASSERT_EQ(count, cu_.mir_graph->block_list_.size()); cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1]; ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type); diff --git a/compiler/dex/pass_driver_me_post_opt.cc b/compiler/dex/pass_driver_me_post_opt.cc index e6238e9f25..9b56c0da87 100644 --- a/compiler/dex/pass_driver_me_post_opt.cc +++ b/compiler/dex/pass_driver_me_post_opt.cc @@ -31,20 +31,19 @@ namespace art { // The initial list of passes to be used by the PassDriveMEPostOpt. template<> const Pass* const PassDriver<PassDriverMEPostOpt>::g_passes[] = { - GetPassInstance<InitializeData>(), - GetPassInstance<ClearPhiInstructions>(), - GetPassInstance<DFSOrders>(), - GetPassInstance<BuildDomination>(), - GetPassInstance<TopologicalSortOrders>(), - GetPassInstance<DefBlockMatrix>(), - GetPassInstance<CreatePhiNodes>(), - GetPassInstance<ClearVisitedFlag>(), - GetPassInstance<SSAConversion>(), - GetPassInstance<PhiNodeOperands>(), - GetPassInstance<ConstantPropagation>(), - GetPassInstance<PerformInitRegLocations>(), - GetPassInstance<MethodUseCount>(), - GetPassInstance<FreeData>(), + GetPassInstance<DFSOrders>(), + GetPassInstance<BuildDomination>(), + GetPassInstance<TopologicalSortOrders>(), + GetPassInstance<InitializeSSATransformation>(), + GetPassInstance<ClearPhiInstructions>(), + GetPassInstance<DefBlockMatrix>(), + GetPassInstance<CreatePhiNodes>(), + GetPassInstance<SSAConversion>(), + GetPassInstance<PhiNodeOperands>(), + GetPassInstance<ConstantPropagation>(), + GetPassInstance<PerformInitRegLocations>(), + GetPassInstance<MethodUseCount>(), + GetPassInstance<FinishSSATransformation>(), }; // The number of the passes in the initial list of Passes (g_passes). diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h index 7b84ba88c5..964355bb5d 100644 --- a/compiler/dex/post_opt_passes.h +++ b/compiler/dex/post_opt_passes.h @@ -24,13 +24,31 @@ namespace art { /** - * @class InitializeData + * @class PassMEMirSsaRep + * @brief Convenience class for passes that check MIRGraph::MirSsaRepUpToDate(). + */ +class PassMEMirSsaRep : public PassME { + public: + PassMEMirSsaRep(const char* name, DataFlowAnalysisMode type = kAllNodes) + : PassME(name, type) { + } + + bool Gate(const PassDataHolder* data) const OVERRIDE { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + return !c_unit->mir_graph->MirSsaRepUpToDate(); + } +}; + +/** + * @class InitializeSSATransformation * @brief There is some data that needs to be initialized before performing * the post optimization passes. */ -class InitializeData : public PassME { +class InitializeSSATransformation : public PassMEMirSsaRep { public: - InitializeData() : PassME("InitializeData", kNoNodes) { + InitializeSSATransformation() : PassMEMirSsaRep("InitializeSSATransformation", kNoNodes) { } void Start(PassDataHolder* data) const { @@ -39,8 +57,8 @@ class InitializeData : public PassME { DCHECK(data != nullptr); CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; DCHECK(c_unit != nullptr); - c_unit->mir_graph.get()->InitializeBasicBlockData(); - c_unit->mir_graph.get()->SSATransformationStart(); + c_unit->mir_graph->SSATransformationStart(); + c_unit->mir_graph->CompilerInitializeSSAConversion(); } }; @@ -62,9 +80,9 @@ class MethodUseCount : public PassME { * @class ClearPhiInformation * @brief Clear the PHI nodes from the CFG. */ -class ClearPhiInstructions : public PassME { +class ClearPhiInstructions : public PassMEMirSsaRep { public: - ClearPhiInstructions() : PassME("ClearPhiInstructions") { + ClearPhiInstructions() : PassMEMirSsaRep("ClearPhiInstructions") { } bool Worker(PassDataHolder* data) const; @@ -115,12 +133,18 @@ class BuildDomination : public PassME { BuildDomination() : PassME("BuildDomination", kNoNodes) { } + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + return !c_unit->mir_graph->DominationUpToDate(); + } + void Start(PassDataHolder* data) const { DCHECK(data != nullptr); CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; DCHECK(c_unit != nullptr); - c_unit->mir_graph.get()->ComputeDominators(); - c_unit->mir_graph.get()->CompilerInitializeSSAConversion(); + c_unit->mir_graph->ComputeDominators(); } void End(PassDataHolder* data) const { @@ -143,6 +167,13 @@ class TopologicalSortOrders : public PassME { TopologicalSortOrders() : PassME("TopologicalSortOrders", kNoNodes) { } + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + return !c_unit->mir_graph->TopologicalOrderUpToDate(); + } + void Start(PassDataHolder* data) const { DCHECK(data != nullptr); CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; @@ -155,9 +186,9 @@ class TopologicalSortOrders : public PassME { * @class DefBlockMatrix * @brief Calculate the matrix of definition per basic block */ -class DefBlockMatrix : public PassME { +class DefBlockMatrix : public PassMEMirSsaRep { public: - DefBlockMatrix() : PassME("DefBlockMatrix", kNoNodes) { + DefBlockMatrix() : PassMEMirSsaRep("DefBlockMatrix", kNoNodes) { } void Start(PassDataHolder* data) const { @@ -172,9 +203,9 @@ class DefBlockMatrix : public PassME { * @class CreatePhiNodes * @brief Pass to create the phi nodes after SSA calculation */ -class CreatePhiNodes : public PassME { +class CreatePhiNodes : public PassMEMirSsaRep { public: - CreatePhiNodes() : PassME("CreatePhiNodes", kNoNodes) { + CreatePhiNodes() : PassMEMirSsaRep("CreatePhiNodes", kNoNodes) { } void Start(PassDataHolder* data) const { @@ -186,30 +217,12 @@ class CreatePhiNodes : public PassME { }; /** - * @class ClearVisitedFlag - * @brief Pass to clear the visited flag for all basic blocks. - */ - -class ClearVisitedFlag : public PassME { - public: - ClearVisitedFlag() : PassME("ClearVisitedFlag", kNoNodes) { - } - - void Start(PassDataHolder* data) const { - DCHECK(data != nullptr); - CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; - DCHECK(c_unit != nullptr); - c_unit->mir_graph.get()->ClearAllVisitedFlags(); - } -}; - -/** * @class SSAConversion * @brief Pass for SSA conversion of MIRs */ -class SSAConversion : public PassME { +class SSAConversion : public PassMEMirSsaRep { public: - SSAConversion() : PassME("SSAConversion", kNoNodes) { + SSAConversion() : PassMEMirSsaRep("SSAConversion", kNoNodes) { } void Start(PassDataHolder* data) const { @@ -217,6 +230,7 @@ class SSAConversion : public PassME { CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; DCHECK(c_unit != nullptr); MIRGraph *mir_graph = c_unit->mir_graph.get(); + mir_graph->ClearAllVisitedFlags(); mir_graph->DoDFSPreOrderSSARename(mir_graph->GetEntryBlock()); } }; @@ -225,9 +239,9 @@ class SSAConversion : public PassME { * @class PhiNodeOperands * @brief Pass to insert the Phi node operands to basic blocks */ -class PhiNodeOperands : public PassME { +class PhiNodeOperands : public PassMEMirSsaRep { public: - PhiNodeOperands() : PassME("PhiNodeOperands", kPreOrderDFSTraversal) { + PhiNodeOperands() : PassMEMirSsaRep("PhiNodeOperands", kPreOrderDFSTraversal) { } bool Worker(PassDataHolder* data) const { @@ -246,9 +260,9 @@ class PhiNodeOperands : public PassME { * @class InitRegLocations * @brief Initialize Register Locations. */ -class PerformInitRegLocations : public PassME { +class PerformInitRegLocations : public PassMEMirSsaRep { public: - PerformInitRegLocations() : PassME("PerformInitRegLocation", kNoNodes) { + PerformInitRegLocations() : PassMEMirSsaRep("PerformInitRegLocation", kNoNodes) { } void Start(PassDataHolder* data) const { @@ -263,9 +277,9 @@ class PerformInitRegLocations : public PassME { * @class ConstantPropagation * @brief Perform a constant propagation pass. */ -class ConstantPropagation : public PassME { +class ConstantPropagation : public PassMEMirSsaRep { public: - ConstantPropagation() : PassME("ConstantPropagation") { + ConstantPropagation() : PassMEMirSsaRep("ConstantPropagation") { } bool Worker(PassDataHolder* data) const { @@ -288,12 +302,12 @@ class ConstantPropagation : public PassME { }; /** - * @class FreeData + * @class FinishSSATransformation * @brief There is some data that needs to be freed after performing the post optimization passes. */ -class FreeData : public PassME { +class FinishSSATransformation : public PassMEMirSsaRep { public: - FreeData() : PassME("FreeData", kNoNodes) { + FinishSSATransformation() : PassMEMirSsaRep("FinishSSATransformation", kNoNodes) { } void End(PassDataHolder* data) const { diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index d3743531fb..b05939156f 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -749,6 +749,7 @@ void ArmMir2Lir::FreeCallTemps() { FreeTemp(rs_r1); FreeTemp(rs_r2); FreeTemp(rs_r3); + FreeTemp(TargetReg(kHiddenArg)); if (!kArm32QuickCodeUseSoftFloat) { FreeTemp(rs_fr0); FreeTemp(rs_fr1); diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 030c5ed2f4..ee7e818f85 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -759,6 +759,7 @@ void Arm64Mir2Lir::FreeCallTemps() { FreeTemp(rs_f5); FreeTemp(rs_f6); FreeTemp(rs_f7); + FreeTemp(TargetReg(kHiddenArg)); } RegStorage Arm64Mir2Lir::LoadHelper(QuickEntrypointEnum trampoline) { diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 9462d3d08f..eb206a68c6 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -1557,7 +1557,7 @@ void Mir2Lir::GenInvokeNoInline(CallInfo* info) { LIR* call_insn = GenCallInsn(method_info); MarkSafepointPC(call_insn); - ClobberCallerSave(); + FreeCallTemps(); if (info->result.location != kLocInvalid) { // We have a following MOVE_RESULT - do it now. if (info->result.wide) { diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index efa130c65d..c22ba04e08 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -445,6 +445,7 @@ void MipsMir2Lir::FreeCallTemps() { FreeTemp(rs_rMIPS_ARG1); FreeTemp(rs_rMIPS_ARG2); FreeTemp(rs_rMIPS_ARG3); + FreeTemp(TargetReg(kHiddenArg)); } bool MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind ATTRIBUTE_UNUSED) { diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 5f6cdda0d3..97732e2c12 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -562,6 +562,7 @@ void X86Mir2Lir::FreeCallTemps() { FreeTemp(TargetReg32(kArg1)); FreeTemp(TargetReg32(kArg2)); FreeTemp(TargetReg32(kArg3)); + FreeTemp(TargetReg32(kHiddenArg)); if (cu_->target64) { FreeTemp(TargetReg32(kArg4)); FreeTemp(TargetReg32(kArg5)); @@ -2209,18 +2210,36 @@ void X86Mir2Lir::GenReduceVector(MIR* mir) { // Handle float case. // TODO Add support for fast math (not value safe) and do horizontal add in that case. + int extract_index = mir->dalvikInsn.arg[0]; + rl_result = EvalLoc(rl_dest, kFPReg, true); NewLIR2(kX86PxorRR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); - NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), vector_src.GetReg()); - // Since FP must keep order of operation for value safety, we shift to low - // 32-bits and add to result. - for (int i = 0; i < 3; i++) { - NewLIR3(kX86ShufpsRRI, vector_src.GetReg(), vector_src.GetReg(), 0x39); + if (LIKELY(extract_index != 0)) { + // We know the index of element which we want to extract. We want to extract it and + // keep values in vector register correct for future use. So the way we act is: + // 1. Generate shuffle mask that allows to swap zeroth and required elements; + // 2. Shuffle vector register with this mask; + // 3. Extract zeroth element where required value lies; + // 4. Shuffle with same mask again to restore original values in vector register. + // The mask is generated from equivalence mask 0b11100100 swapping 0th and extracted + // element indices. + int shuffle[4] = {0b00, 0b01, 0b10, 0b11}; + shuffle[0] = extract_index; + shuffle[extract_index] = 0; + int mask = 0; + for (int i = 0; i < 4; i++) { + mask |= (shuffle[i] << (2 * i)); + } + NewLIR3(kX86ShufpsRRI, vector_src.GetReg(), vector_src.GetReg(), mask); + NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), vector_src.GetReg()); + NewLIR3(kX86ShufpsRRI, vector_src.GetReg(), vector_src.GetReg(), mask); + } else { + // We need to extract zeroth element and don't need any complex stuff to do it. NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), vector_src.GetReg()); } - StoreValue(rl_dest, rl_result); + StoreFinalValue(rl_dest, rl_result); } else if (opsize == kDouble) { // TODO Handle double case. LOG(FATAL) << "Unsupported add reduce for double."; diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc index 7cd431e26c..3905649ac6 100644 --- a/compiler/dex/ssa_transformation.cc +++ b/compiler/dex/ssa_transformation.cc @@ -103,7 +103,7 @@ void MIRGraph::ComputeDFSOrders() { num_reachable_blocks_ = dfs_order_.size(); - if (num_reachable_blocks_ != num_blocks_) { + if (num_reachable_blocks_ != GetNumBlocks()) { // Kill all unreachable blocks. AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { @@ -173,9 +173,9 @@ void MIRGraph::ComputeDomPostOrderTraversal(BasicBlock* bb) { dom_post_order_traversal_.reserve(num_reachable_blocks_); ClearAllVisitedFlags(); - DCHECK(temp_scoped_alloc_.get() != nullptr); + ScopedArenaAllocator allocator(&cu_->arena_stack); ScopedArenaVector<std::pair<BasicBlock*, ArenaBitVector::IndexIterator>> work_stack( - temp_scoped_alloc_->Adapter()); + allocator.Adapter()); bb->visited = true; work_stack.push_back(std::make_pair(bb, bb->i_dominated->Indexes().begin())); while (!work_stack.empty()) { @@ -402,6 +402,8 @@ void MIRGraph::ComputeDominators() { for (BasicBlock* bb = iter5.Next(); bb != NULL; bb = iter5.Next()) { ComputeDominanceFrontier(bb); } + + domination_up_to_date_ = true; } /* diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index f9054e0133..dde0dfe394 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -670,10 +670,13 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedField); return false; } + +#if defined(__aarch64__) if (resolved_field->IsVolatile()) { MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile); return false; } +#endif Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType(); @@ -689,12 +692,14 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, null_check, value, field_type, - resolved_field->GetOffset())); + resolved_field->GetOffset(), + resolved_field->IsVolatile())); } else { current_block_->AddInstruction(new (arena_) HInstanceFieldGet( current_block_->GetLastInstruction(), field_type, - resolved_field->GetOffset())); + resolved_field->GetOffset(), + resolved_field->IsVolatile())); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } @@ -723,10 +728,12 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, return false; } +#if defined(__aarch64__) if (resolved_field->IsVolatile()) { MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile); return false; } +#endif Handle<mirror::Class> referrer_class(hs.NewHandle(compiler_driver_->ResolveCompilingMethodsClass( soa, dex_cache, class_loader, outer_compilation_unit_))); @@ -763,10 +770,12 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, HInstruction* value = LoadLocal(source_or_dest_reg, field_type); DCHECK_EQ(value->GetType(), field_type); current_block_->AddInstruction( - new (arena_) HStaticFieldSet(cls, value, field_type, resolved_field->GetOffset())); + new (arena_) HStaticFieldSet(cls, value, field_type, resolved_field->GetOffset(), + resolved_field->IsVolatile())); } else { current_block_->AddInstruction( - new (arena_) HStaticFieldGet(cls, field_type, resolved_field->GetOffset())); + new (arena_) HStaticFieldGet(cls, field_type, resolved_field->GetOffset(), + resolved_field->IsVolatile())); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } return true; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 002d9d4449..063dc7cafb 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -2556,68 +2556,170 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unreachable"; } -void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) { + // TODO (ported from quick): revisit Arm barrier kinds + DmbOptions flavour = DmbOptions::ISH; // quiet c++ warnings + switch (kind) { + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kAnyAny: { + flavour = DmbOptions::ISH; + break; + } + case MemBarrierKind::kStoreStore: { + flavour = DmbOptions::ISHST; + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } + __ dmb(flavour); +} + +void InstructionCodeGeneratorARM::GenerateWideAtomicLoad(Register addr, + uint32_t offset, + Register out_lo, + Register out_hi) { + if (offset != 0) { + __ LoadImmediate(out_lo, offset); + __ add(addr, addr, ShifterOperand(out_lo)); + } + __ ldrexd(out_lo, out_hi, addr); +} + +void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr, + uint32_t offset, + Register value_lo, + Register value_hi, + Register temp1, + Register temp2) { + Label fail; + if (offset != 0) { + __ LoadImmediate(temp1, offset); + __ add(addr, addr, ShifterOperand(temp1)); + } + __ Bind(&fail); + // We need a load followed by store. (The address used in a STREX instruction must + // be the same as the address in the most recently executed LDREX instruction.) + __ ldrexd(temp1, temp2, addr); + __ strexd(temp1, value_lo, value_hi, addr); + __ cmp(temp1, ShifterOperand(0)); + __ b(&fail, NE); +} + +void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue()); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); + + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; + // Temporary registers for the write barrier. - if (needs_write_barrier) { + // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else if (is_volatile && is_wide) { + // Arm encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever + // enable Arm encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + if (field_type == Primitive::kPrimDouble) { + // For doubles we need two more registers to copy the value. + locations->AddTemp(Location::RegisterLocation(R2)); + locations->AddTemp(Location::RegisterLocation(R3)); + } } } -void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location value = locations->InAt(1); + + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreByte, value, obj, offset); + __ StoreToOffset(kStoreByte, value.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreHalfword, value, obj, offset); + __ StoreToOffset(kStoreHalfword, value.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreWord, value, obj, offset); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { + Register value_reg = value.AsRegister<Register>(); + __ StoreToOffset(kStoreWord, value_reg, base, offset); + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { Register temp = locations->GetTemp(0).AsRegister<Register>(); Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, obj, value); + codegen_->MarkGCCard(temp, card, base, value_reg); } break; } case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), obj, offset); + if (is_volatile) { + // TODO: We could use ldrd and strd that are atomic with Large Physical Address Extension + // support. This info is stored in the compiler driver (HasAtomicLdrdAndStrd) and we should + // pass it around to be able to optimize. + GenerateWideAtomicStore(base, offset, + value.AsRegisterPairLow<Register>(), + value.AsRegisterPairHigh<Register>(), + locations->GetTemp(0).AsRegister<Register>(), + locations->GetTemp(1).AsRegister<Register>()); + } else { + __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), base, offset); + } break; } case Primitive::kPrimFloat: { - SRegister value = locations->InAt(1).AsFpuRegister<SRegister>(); - __ StoreSToOffset(value, obj, offset); + __ StoreSToOffset(value.AsFpuRegister<SRegister>(), base, offset); break; } case Primitive::kPrimDouble: { - DRegister value = FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()); - __ StoreDToOffset(value, obj, offset); + DRegister value_reg = FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()); + if (is_volatile) { + Register value_reg_lo = locations->GetTemp(0).AsRegister<Register>(); + Register value_reg_hi = locations->GetTemp(1).AsRegister<Register>(); + + __ vmovrrd(value_reg_lo, value_reg_hi, value_reg); + + GenerateWideAtomicStore(base, offset, + value_reg_lo, + value_reg_hi, + locations->GetTemp(2).AsRegister<Register>(), + locations->GetTemp(3).AsRegister<Register>()); + } else { + __ StoreDToOffset(value_reg, base, offset); + } break; } @@ -2625,75 +2727,138 @@ void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } } -void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble)) { + // Arm encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever + // enable Arm encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } } -void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { + switch (field_type) { case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedByte, out, obj, offset); + __ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, obj, offset); + __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimLong: { - // TODO: support volatile. - Location out = locations->Out(); - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset); + if (is_volatile) { + GenerateWideAtomicLoad(base, offset, + out.AsRegisterPairLow<Register>(), + out.AsRegisterPairHigh<Register>()); + } else { + __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset); + } break; } case Primitive::kPrimFloat: { - SRegister out = locations->Out().AsFpuRegister<SRegister>(); - __ LoadSFromOffset(out, obj, offset); + __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset); break; } case Primitive::kPrimDouble: { - DRegister out = FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()); - __ LoadDFromOffset(out, obj, offset); + DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()); + if (is_volatile) { + Register lo = locations->GetTemp(0).AsRegister<Register>(); + Register hi = locations->GetTemp(1).AsRegister<Register>(); + GenerateWideAtomicLoad(base, offset, lo, hi); + __ vmovdrr(out_reg, lo, hi); + } else { + __ LoadDFromOffset(out_reg, base, offset); + } break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } +} + +void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) { @@ -3206,146 +3371,6 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck( __ Bind(slow_path->GetExitLabel()); } -void LocationsBuilderARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedByte, out, cls, offset); - break; - } - - case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedByte, out, cls, offset); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedHalfword, out, cls, offset); - break; - } - - case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedHalfword, out, cls, offset); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, cls, offset); - break; - } - - case Primitive::kPrimLong: { - // TODO: support volatile. - Location out = locations->Out(); - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), cls, offset); - break; - } - - case Primitive::kPrimFloat: { - SRegister out = locations->Out().AsFpuRegister<SRegister>(); - __ LoadSFromOffset(out, cls, offset); - break; - } - - case Primitive::kPrimDouble: { - DRegister out = FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()); - __ LoadDFromOffset(out, cls, offset); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue()); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreByte, value, cls, offset); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreHalfword, value, cls, offset); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreWord, value, cls, offset); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), cls, offset); - break; - } - - case Primitive::kPrimFloat: { - SRegister value = locations->InAt(1).AsFpuRegister<SRegister>(); - __ StoreSToOffset(value, cls, offset); - break; - } - - case Primitive::kPrimDouble: { - DRegister value = FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()); - __ StoreDToOffset(value, cls, offset); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderARM::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 226e635d05..b86670d939 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm/assembler_thumb2.h" @@ -110,6 +111,8 @@ class LocationsBuilderARM : public HGraphVisitor { void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorARM* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -138,6 +141,15 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void GenerateMemoryBarrier(MemBarrierKind kind); + void GenerateWideAtomicStore(Register addr, uint32_t offset, + Register value_lo, Register value_hi, + Register temp1, Register temp2); + void GenerateWideAtomicLoad(Register addr, uint32_t offset, + Register out_lo, Register out_hi); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + ArmAssembler* const assembler_; CodeGeneratorARM* const codegen_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index e7edd8a805..ddb0e82ec0 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -2656,82 +2656,115 @@ void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unreachable"; } -void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { + /* + * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. + * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. + * For those cases, all we need to ensure is that there is a scheduling barrier in place. + */ + switch (kind) { + case MemBarrierKind::kAnyAny: { + __ mfence(); + break; + } + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kStoreStore: { + // nop + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } +} + + +void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { + Label is_null; + __ testl(value, value); + __ j(kEqual, &is_null); + __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value())); + __ movl(temp, object); + __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); + __ movb(Address(temp, card, TIMES_1, 0), + X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); + __ Bind(&is_null); +} + +void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - bool is_byte_type = (field_type == Primitive::kPrimBoolean) - || (field_type == Primitive::kPrimByte); - // The register allocator does not support multiple - // inputs that die at entry with one in a specific register. - if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(1, Location::RegisterLocation(EAX)); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { + // Long values can be loaded atomically into an XMM using movsd. + // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM + // and then copy the XMM into the output 32bits at a time). + locations->AddTemp(Location::RequiresFpuRegister()); } } -void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (field_type) { - case Primitive::kPrimBoolean: + case Primitive::kPrimBoolean: { + __ movzxb(out.AsRegister<Register>(), Address(base, offset)); + break; + } + case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); - __ movb(Address(obj, offset), value); + __ movsxb(out.AsRegister<Register>(), Address(base, offset)); + break; + } + + case Primitive::kPrimShort: { + __ movsxw(out.AsRegister<Register>(), Address(base, offset)); break; } - case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movw(Address(obj, offset), value); + __ movzxw(out.AsRegister<Register>(), Address(base, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movl(Address(obj, offset), value); - - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, obj, value); - } + __ movl(out.AsRegister<Register>(), Address(base, offset)); break; } case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ movl(Address(obj, offset), value.AsRegisterPairLow<Register>()); - __ movl(Address(obj, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); + if (is_volatile) { + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movsd(temp, Address(base, offset)); + __ movd(out.AsRegisterPairLow<Register>(), temp); + __ psrlq(temp, Immediate(32)); + __ movd(out.AsRegisterPairHigh<Register>(), temp); + } else { + __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset)); + __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset)); + } break; } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(obj, offset), value); + __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(obj, offset), value); + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } @@ -2739,87 +2772,152 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instr LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } -} -void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { - Label is_null; - __ testl(value, value); - __ j(kEqual, &is_null); - __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value())); - __ movl(temp, object); - __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); - __ movb(Address(temp, card, TIMES_1, 0), - X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); - __ Bind(&is_null); + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } -void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + bool is_byte_type = (field_type == Primitive::kPrimBoolean) + || (field_type == Primitive::kPrimByte); + + // The register allocator does not support multiple + // inputs that die at entry with one in a specific register. + if (is_byte_type) { + // Ensure the value is in a byte register. + locations->SetInAt(1, Location::RegisterLocation(EAX)); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + // Temporary registers for the write barrier. + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + locations->AddTemp(Location::RequiresRegister()); + // Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } else if (is_volatile && (field_type == Primitive::kPrimLong)) { + // 64bits value can be atomically written to an address with movsd and an XMM register. + // We need two XMM registers because there's no easier way to (bit) copy a register pair + // into a single XMM register (we copy each pair part into the XMMs and then interleave them). + // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the + // isolated cases when we need this it isn't worth adding the extra complexity. + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + } } -void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location value = locations->InAt(1); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxb(out, Address(obj, offset)); - break; - } + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + switch (field_type) { + case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxb(out, Address(obj, offset)); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxw(out, Address(obj, offset)); + __ movb(Address(base, offset), value.AsRegister<ByteRegister>()); break; } + case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxw(out, Address(obj, offset)); + __ movw(Address(base, offset), value.AsRegister<Register>()); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ movl(out, Address(obj, offset)); + __ movl(Address(base, offset), value.AsRegister<Register>()); + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>()); + } break; } case Primitive::kPrimLong: { - // TODO: support volatile. - __ movl(locations->Out().AsRegisterPairLow<Register>(), Address(obj, offset)); - __ movl(locations->Out().AsRegisterPairHigh<Register>(), Address(obj, kX86WordSize + offset)); + if (is_volatile) { + XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + __ movd(temp1, value.AsRegisterPairLow<Register>()); + __ movd(temp2, value.AsRegisterPairHigh<Register>()); + __ punpckldq(temp1, temp2); + __ movsd(Address(base, offset), temp1); + } else { + __ movl(Address(base, offset), value.AsRegisterPairLow<Register>()); + __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); + } break; } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(obj, offset)); + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(obj, offset)); + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } +} + +void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) { @@ -3383,159 +3481,6 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( // No need for memory fence, thanks to the X86 memory model. } -void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ movl(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimLong: { - // TODO: support volatile. - __ movl(locations->Out().AsRegisterPairLow<Register>(), Address(cls, offset)); - __ movl(locations->Out().AsRegisterPairHigh<Register>(), Address(cls, kX86WordSize + offset)); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); - bool is_byte_type = (field_type == Primitive::kPrimBoolean) - || (field_type == Primitive::kPrimByte); - // The register allocator does not support multiple - // inputs that die at entry with one in a specific register. - if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(1, Location::RegisterLocation(EAX)); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); - } -} - -void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); - __ movb(Address(cls, offset), value); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movw(Address(cls, offset), value); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movl(Address(cls, offset), value); - - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ movl(Address(cls, offset), value.AsRegisterPairLow<Register>()); - __ movl(Address(cls, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(cls, offset), value); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(cls, offset), value); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index aed06c04df..636f8845e5 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86/assembler_x86.h" @@ -105,6 +106,8 @@ class LocationsBuilderX86 : public HGraphVisitor { void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); void HandleShift(HBinaryOperation* instruction); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorX86* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -137,6 +140,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ff7fcdcbac..1bc3092d15 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -2389,69 +2389,87 @@ void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unimplemented"; } -void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { + /* + * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. + * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. + * For those cases, all we need to ensure is that there is a scheduling barrier in place. + */ + switch (kind) { + case MemBarrierKind::kAnyAny: { + __ mfence(); + break; + } + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kStoreStore: { + // nop + break; + } + default: + LOG(FATAL) << "Unexpected memory barier " << kind; + } +} + +void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue()); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } -void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - Primitive::Type field_type = instruction->GetFieldType(); + CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (field_type) { - case Primitive::kPrimBoolean: + case Primitive::kPrimBoolean: { + __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset)); + break; + } + case Primitive::kPrimByte: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movb(Address(obj, offset), value); + __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset)); + break; + } + + case Primitive::kPrimShort: { + __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } - case Primitive::kPrimShort: case Primitive::kPrimChar: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movw(Address(obj, offset), value); + __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movl(Address(obj, offset), value); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, obj, value); - } + __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimLong: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movq(Address(obj, offset), value); + __ movq(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(obj, offset), value); + __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(obj, offset), value); + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } @@ -2459,74 +2477,124 @@ void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* in LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } -void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(field_info.GetFieldType(), instruction->InputAt(1)); + locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetInAt(1, Location::RequiresRegister()); + if (needs_write_barrier) { + // Temporary registers for the write barrier. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } } -void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); + CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); + Location value = locations->InAt(1); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxb(out, Address(obj, offset)); - break; - } + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + switch (field_type) { + case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxb(out, Address(obj, offset)); - break; - } - - case Primitive::kPrimShort: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxw(out, Address(obj, offset)); + __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); break; } + case Primitive::kPrimShort: case Primitive::kPrimChar: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxw(out, Address(obj, offset)); + __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movl(out, Address(obj, offset)); + __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimLong: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movq(out, Address(obj, offset)); + __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); break; } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(obj, offset)); + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(obj, offset)); + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } +} + +void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { @@ -3222,146 +3290,6 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); } -void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimByte: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimShort: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimChar: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movl(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimLong: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movq(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue()); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movb(Address(cls, offset), value); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movw(Address(cls, offset), value); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movl(Address(cls, offset), value); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movq(Address(cls, offset), value); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(cls, offset), value); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(cls, offset), value); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 794b81ffbc..070886460b 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86_64/assembler_x86_64.h" @@ -109,6 +110,8 @@ class LocationsBuilderX86_64 : public HGraphVisitor { void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction); CodeGeneratorX86_64* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -138,6 +141,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleShift(HBinaryOperation* operation); + void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index 94ff192264..48f1ea9e15 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -40,18 +40,22 @@ TEST(GVNTest, LocalFieldElimination) { entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); HInstruction* to_remove = block->GetLastInstruction(); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(43))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(43), false)); HInstruction* different_offset = block->GetLastInstruction(); // Kill the value. block->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); HInstruction* use_after_kill = block->GetLastInstruction(); block->AddInstruction(new (&allocator) HExit()); @@ -82,7 +86,8 @@ TEST(GVNTest, GlobalFieldElimination) { graph->AddBlock(block); entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); block->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); HBasicBlock* then = new (&allocator) HBasicBlock(graph); @@ -98,13 +103,16 @@ TEST(GVNTest, GlobalFieldElimination) { else_->AddSuccessor(join); then->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); then->AddInstruction(new (&allocator) HGoto()); else_->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); else_->AddInstruction(new (&allocator) HGoto()); join->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); join->AddInstruction(new (&allocator) HExit()); graph->TryBuildingSsa(); @@ -132,7 +140,8 @@ TEST(GVNTest, LoopFieldElimination) { graph->AddBlock(block); entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); block->AddInstruction(new (&allocator) HGoto()); HBasicBlock* loop_header = new (&allocator) HBasicBlock(graph); @@ -148,22 +157,25 @@ TEST(GVNTest, LoopFieldElimination) { loop_body->AddSuccessor(loop_header); loop_header->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_loop_header = loop_header->GetLastInstruction(); loop_header->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); // Kill inside the loop body to prevent field gets inside the loop header // and the body to be GVN'ed. loop_body->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); HInstruction* field_set = loop_body->GetLastInstruction(); loop_body->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_loop_body = loop_body->GetLastInstruction(); loop_body->AddInstruction(new (&allocator) HGoto()); exit->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_exit = exit->GetLastInstruction(); exit->AddInstruction(new (&allocator) HExit()); @@ -242,7 +254,7 @@ TEST(GVNTest, LoopSideEffects) { { // Make one block with a side effect. entry->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); GlobalValueNumberer gvn(&allocator, graph); gvn.Run(); @@ -256,7 +268,7 @@ TEST(GVNTest, LoopSideEffects) { { outer_loop_body->InsertInstructionBefore( new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42)), + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false), outer_loop_body->GetLastInstruction()); GlobalValueNumberer gvn(&allocator, graph); @@ -273,7 +285,7 @@ TEST(GVNTest, LoopSideEffects) { outer_loop_body->RemoveInstruction(outer_loop_body->GetFirstInstruction()); inner_loop_body->InsertInstructionBefore( new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42)), + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false), inner_loop_body->GetLastInstruction()); GlobalValueNumberer gvn(&allocator, graph); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index c963b70492..0fc1fd8663 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2128,39 +2128,45 @@ class HNullCheck : public HExpression<1> { class FieldInfo : public ValueObject { public: - FieldInfo(MemberOffset field_offset, Primitive::Type field_type) - : field_offset_(field_offset), field_type_(field_type) {} + FieldInfo(MemberOffset field_offset, Primitive::Type field_type, bool is_volatile) + : field_offset_(field_offset), field_type_(field_type), is_volatile_(is_volatile) {} MemberOffset GetFieldOffset() const { return field_offset_; } Primitive::Type GetFieldType() const { return field_type_; } + bool IsVolatile() const { return is_volatile_; } private: const MemberOffset field_offset_; const Primitive::Type field_type_; + const bool is_volatile_; }; class HInstanceFieldGet : public HExpression<1> { public: HInstanceFieldGet(HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HExpression(field_type, SideEffects::DependsOnSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, value); } - virtual bool CanBeMoved() const { return true; } - virtual bool InstructionDataEquals(HInstruction* other) const { - size_t other_offset = other->AsInstanceFieldGet()->GetFieldOffset().SizeValue(); - return other_offset == GetFieldOffset().SizeValue(); + bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } + + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + HInstanceFieldGet* other_get = other->AsInstanceFieldGet(); + return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } virtual size_t ComputeHashCode() const { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } DECLARE_INSTRUCTION(InstanceFieldGet); @@ -2175,15 +2181,18 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { HInstanceFieldSet(HInstruction* object, HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HTemplateInstruction(SideEffects::ChangesSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, object); SetRawInputAt(1, value); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } HInstruction* GetValue() const { return InputAt(1); } @@ -2496,24 +2505,29 @@ class HStaticFieldGet : public HExpression<1> { public: HStaticFieldGet(HInstruction* cls, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HExpression(field_type, SideEffects::DependsOnSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, cls); } - bool CanBeMoved() const OVERRIDE { return true; } + + bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { - size_t other_offset = other->AsStaticFieldGet()->GetFieldOffset().SizeValue(); - return other_offset == GetFieldOffset().SizeValue(); + HStaticFieldGet* other_get = other->AsStaticFieldGet(); + return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } size_t ComputeHashCode() const OVERRIDE { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } DECLARE_INSTRUCTION(StaticFieldGet); @@ -2528,15 +2542,18 @@ class HStaticFieldSet : public HTemplateInstruction<2> { HStaticFieldSet(HInstruction* cls, HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HTemplateInstruction(SideEffects::ChangesSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, cls); SetRawInputAt(1, value); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } HInstruction* GetValue() const { return InputAt(1); } @@ -2677,7 +2694,7 @@ class HMonitorOperation : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(MonitorOperation); - protected: + private: const OperationKind kind_; const uint32_t dex_pc_; @@ -2685,7 +2702,6 @@ class HMonitorOperation : public HTemplateInstruction<1> { DISALLOW_COPY_AND_ASSIGN(HMonitorOperation); }; - class MoveOperands : public ArenaObject<kArenaAllocMisc> { public: MoveOperands(Location source, Location destination, HInstruction* instruction) diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index f677e840ef..c2ea80ec33 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -462,7 +462,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, entry->AddSuccessor(block); HInstruction* test = new (allocator) HInstanceFieldGet( - parameter, Primitive::kPrimBoolean, MemberOffset(22)); + parameter, Primitive::kPrimBoolean, MemberOffset(22), false); block->AddInstruction(test); block->AddInstruction(new (allocator) HIf(test)); HBasicBlock* then = new (allocator) HBasicBlock(graph); @@ -481,8 +481,10 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, *phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); join->AddPhi(*phi); - *input1 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); - *input2 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); + *input1 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); + *input2 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); then->AddInstruction(*input1); else_->AddInstruction(*input2); join->AddInstruction(new (allocator) HExit()); @@ -581,7 +583,8 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator, graph->AddBlock(block); entry->AddSuccessor(block); - *field = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); + *field = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); block->AddInstruction(*field); *ret = new (allocator) HReturn(*field); block->AddInstruction(*ret); diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index c86ec4b3d6..87b38133fb 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -429,6 +429,8 @@ class ArmAssembler : public Assembler { virtual void ldrex(Register rd, Register rn, Condition cond = AL) = 0; virtual void strex(Register rd, Register rt, Register rn, Condition cond = AL) = 0; + virtual void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) = 0; + virtual void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) = 0; // Miscellaneous instructions. virtual void clrex(Condition cond = AL) = 0; diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index 8f6d45ab53..8d1fb60725 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -778,6 +778,7 @@ void Arm32Assembler::EmitMulOp(Condition cond, int32_t opcode, Emit(encoding); } + void Arm32Assembler::ldrex(Register rt, Register rn, Condition cond) { CHECK_NE(rn, kNoRegister); CHECK_NE(rt, kNoRegister); @@ -793,6 +794,25 @@ void Arm32Assembler::ldrex(Register rt, Register rn, Condition cond) { } +void Arm32Assembler::ldrexd(Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, R14); + CHECK_EQ(0u, static_cast<uint32_t>(rt) % 2); + CHECK_EQ(static_cast<uint32_t>(rt) + 1, static_cast<uint32_t>(rt2)); + CHECK_NE(cond, kNoCondition); + + int32_t encoding = + (static_cast<uint32_t>(cond) << kConditionShift) | + B24 | B23 | B21 | B20 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rt) << 12 | + B11 | B10 | B9 | B8 | B7 | B4 | B3 | B2 | B1 | B0; + Emit(encoding); +} + + void Arm32Assembler::strex(Register rd, Register rt, Register rn, @@ -811,6 +831,28 @@ void Arm32Assembler::strex(Register rd, Emit(encoding); } +void Arm32Assembler::strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rd, kNoRegister); + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, R14); + CHECK_NE(rd, rt); + CHECK_NE(rd, rt2); + CHECK_EQ(0u, static_cast<uint32_t>(rt) % 2); + CHECK_EQ(static_cast<uint32_t>(rt) + 1, static_cast<uint32_t>(rt2)); + CHECK_NE(cond, kNoCondition); + + int32_t encoding = + (static_cast<uint32_t>(cond) << kConditionShift) | + B24 | B23 | B21 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rd) << 12 | + B11 | B10 | B9 | B8 | B7 | B4 | + static_cast<uint32_t>(rt); + Emit(encoding); +} + void Arm32Assembler::clrex(Condition cond) { CHECK_EQ(cond, AL); // This cannot be conditional on ARM. diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 6c8d41587b..b922d66513 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -123,6 +123,8 @@ class Arm32Assembler FINAL : public ArmAssembler { void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE; void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE; + void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; + void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; // Miscellaneous instructions. void clrex(Condition cond = AL) OVERRIDE; diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc index 951792d45b..4a0ae0ba99 100644 --- a/compiler/utils/arm/assembler_arm32_test.cc +++ b/compiler/utils/arm/assembler_arm32_test.cc @@ -697,4 +697,28 @@ TEST_F(AssemblerArm32Test, Vmstat) { DriverStr(expected, "vmrs"); } +TEST_F(AssemblerArm32Test, ldrexd) { + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R0); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R1); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R2); + + const char* expected = + "ldrexd r0, r1, [r0]\n" + "ldrexd r0, r1, [r1]\n" + "ldrexd r0, r1, [r2]\n"; + DriverStr(expected, "ldrexd"); +} + +TEST_F(AssemblerArm32Test, strexd) { + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R0); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R1); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R2); + + const char* expected = + "strexd r9, r0, r1, [r0]\n" + "strexd r9, r0, r1, [r1]\n" + "strexd r9, r0, r1, [r2]\n"; + DriverStr(expected, "strexd"); +} + } // namespace art diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 3eaae56376..3eccd3f9df 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -1662,9 +1662,6 @@ void Thumb2Assembler::ldrex(Register rt, Register rn, uint16_t imm, Condition co CHECK_NE(rn, kNoRegister); CHECK_NE(rt, kNoRegister); CheckCondition(cond); - CHECK_NE(rn, kNoRegister); - CHECK_NE(rt, kNoRegister); - CheckCondition(cond); CHECK_LT(imm, (1u << 10)); int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 | @@ -1701,6 +1698,22 @@ void Thumb2Assembler::strex(Register rd, } +void Thumb2Assembler::ldrexd(Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, rt2); + CheckCondition(cond); + + int32_t encoding = B31 | B30 | B29 | B27 | B23 | B22 | B20 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rt) << 12 | + static_cast<uint32_t>(rt2) << 8 | + B6 | B5 | B4 | B3 | B2 | B1 | B0; + Emit32(encoding); +} + + void Thumb2Assembler::strex(Register rd, Register rt, Register rn, @@ -1709,6 +1722,26 @@ void Thumb2Assembler::strex(Register rd, } +void Thumb2Assembler::strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rd, kNoRegister); + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, rt2); + CHECK_NE(rd, rt); + CHECK_NE(rd, rt2); + CheckCondition(cond); + + int32_t encoding = B31 | B30 | B29 | B27 | B23 | B22 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rt) << 12 | + static_cast<uint32_t>(rt2) << 8 | + B6 | B5 | B4 | + static_cast<uint32_t>(rd); + Emit32(encoding); +} + + void Thumb2Assembler::clrex(Condition cond) { CheckCondition(cond); int32_t encoding = B31 | B30 | B29 | B27 | B28 | B25 | B24 | B23 | diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 48a3a7eeb2..81dd13894f 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -149,6 +149,8 @@ class Thumb2Assembler FINAL : public ArmAssembler { void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL); void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL); + void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; + void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; // Miscellaneous instructions. void clrex(Condition cond = AL) OVERRIDE; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 6ae95a40e6..425ccd7ea3 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -164,4 +164,32 @@ TEST_F(AssemblerThumb2Test, Vmstat) { DriverStr(expected, "vmrs"); } +TEST_F(AssemblerThumb2Test, ldrexd) { + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R0); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R1); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R2); + GetAssembler()->ldrexd(arm::R5, arm::R3, arm::R7); + + const char* expected = + "ldrexd r0, r1, [r0]\n" + "ldrexd r0, r1, [r1]\n" + "ldrexd r0, r1, [r2]\n" + "ldrexd r5, r3, [r7]\n"; + DriverStr(expected, "ldrexd"); +} + +TEST_F(AssemblerThumb2Test, strexd) { + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R0); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R1); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R2); + GetAssembler()->strexd(arm::R9, arm::R5, arm::R3, arm::R7); + + const char* expected = + "strexd r9, r0, r1, [r0]\n" + "strexd r9, r0, r1, [r1]\n" + "strexd r9, r0, r1, [r2]\n" + "strexd r9, r5, r3, [r7]\n"; + DriverStr(expected, "strexd"); +} + } // namespace art diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index f0353f6cd2..f8c0043242 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -443,6 +443,27 @@ void X86Assembler::movsd(XmmRegister dst, XmmRegister src) { } +void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::punpckldq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x62); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::addsd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 9fecf1edf0..6c3d13122c 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -274,6 +274,9 @@ class X86Assembler FINAL : public Assembler { void movsd(const Address& dst, XmmRegister src); void movsd(XmmRegister dst, XmmRegister src); + void psrlq(XmmRegister reg, const Immediate& shift_count); + void punpckldq(XmmRegister dst, XmmRegister src); + void addsd(XmmRegister dst, XmmRegister src); void addsd(XmmRegister dst, const Address& src); void subsd(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index d901673691..fccb510afb 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -105,6 +105,18 @@ TEST_F(AssemblerX86Test, Movl) { DriverStr(expected, "movl"); } +TEST_F(AssemblerX86Test, psrlq) { + GetAssembler()->psrlq(x86::XMM0, CreateImmediate(32)); + const char* expected = "psrlq $0x20, %xmm0\n"; + DriverStr(expected, "psrlq"); +} + +TEST_F(AssemblerX86Test, punpckldq) { + GetAssembler()->punpckldq(x86::XMM0, x86::XMM1); + const char* expected = "punpckldq %xmm1, %xmm0\n"; + DriverStr(expected, "punpckldq"); +} + TEST_F(AssemblerX86Test, LoadLongConstant) { GetAssembler()->LoadLongConstant(x86::XMM0, 51); const char* expected = diff --git a/runtime/Android.mk b/runtime/Android.mk index ca29eba4ee..13a216c48b 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -67,6 +67,7 @@ LIBART_COMMON_SRC_FILES := \ gc/space/rosalloc_space.cc \ gc/space/space.cc \ gc/space/zygote_space.cc \ + gc/task_processor.cc \ hprof/hprof.cc \ image.cc \ indirect_reference_table.cc \ diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 8f09e074f7..26d6117122 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -52,6 +52,7 @@ #include "gc/space/rosalloc_space-inl.h" #include "gc/space/space-inl.h" #include "gc/space/zygote_space.h" +#include "gc/task_processor.h" #include "entrypoints/quick/quick_alloc_entrypoints.h" #include "heap-inl.h" #include "image.h" @@ -129,10 +130,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max foreground_collector_type_(foreground_collector_type), background_collector_type_(background_collector_type), desired_collector_type_(foreground_collector_type_), - heap_trim_request_lock_(nullptr), - last_trim_time_(0), - heap_transition_or_trim_target_time_(0), - heap_trim_request_pending_(false), + pending_task_lock_(nullptr), parallel_gc_threads_(parallel_gc_threads), conc_gc_threads_(conc_gc_threads), low_memory_mode_(low_memory_mode), @@ -142,8 +140,6 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max zygote_creation_lock_("zygote creation lock", kZygoteCreationLock), zygote_space_(nullptr), large_object_threshold_(large_object_threshold), - gc_request_pending_(false), - conc_gc_running_(false), collector_type_running_(kCollectorTypeNone), last_gc_type_(collector::kGcTypeNone), next_gc_type_(collector::kGcTypePartial), @@ -194,6 +190,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max min_interval_homogeneous_space_compaction_by_oom_( min_interval_homogeneous_space_compaction_by_oom), last_time_homogeneous_space_compaction_by_oom_(NanoTime()), + pending_collector_transition_(nullptr), + pending_heap_trim_(nullptr), use_homogeneous_space_compaction_for_oom_(use_homogeneous_space_compaction_for_oom) { if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) { LOG(INFO) << "Heap() entering"; @@ -409,9 +407,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max gc_complete_lock_ = new Mutex("GC complete lock"); gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable", *gc_complete_lock_)); - gc_request_lock_ = new Mutex("GC request lock"); - gc_request_cond_.reset(new ConditionVariable("GC request condition variable", *gc_request_lock_)); - heap_trim_request_lock_ = new Mutex("Heap trim request lock"); + task_processor_.reset(new TaskProcessor()); + pending_task_lock_ = new Mutex("Pending task lock"); if (ignore_max_footprint_) { SetIdealFootprint(std::numeric_limits<size_t>::max()); concurrent_start_bytes_ = std::numeric_limits<size_t>::max(); @@ -719,8 +716,8 @@ void Heap::VisitObjects(ObjectCallback callback, void* arg) { mirror::Object* obj = *it; if (obj != nullptr && obj->GetClass() != nullptr) { // Avoid the race condition caused by the object not yet being written into the allocation - // stack or the class not yet being written in the object. Or, if kUseThreadLocalAllocationStack, - // there can be nulls on the allocation stack. + // stack or the class not yet being written in the object. Or, if + // kUseThreadLocalAllocationStack, there can be nulls on the allocation stack. callback(obj, arg); } } @@ -872,8 +869,7 @@ Heap::~Heap() { STLDeleteElements(&continuous_spaces_); STLDeleteElements(&discontinuous_spaces_); delete gc_complete_lock_; - delete gc_request_lock_; - delete heap_trim_request_lock_; + delete pending_task_lock_; VLOG(heap) << "Finished ~Heap()"; } @@ -944,37 +940,23 @@ void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType self->ThrowOutOfMemoryError(oss.str().c_str()); } -void Heap::DoPendingTransitionOrTrim() { - Thread* self = Thread::Current(); - CollectorType desired_collector_type; - // Wait until we reach the desired transition time. - while (true) { - uint64_t wait_time; - { - MutexLock mu(self, *heap_trim_request_lock_); - desired_collector_type = desired_collector_type_; - uint64_t current_time = NanoTime(); - if (current_time >= heap_transition_or_trim_target_time_) { - break; - } - wait_time = heap_transition_or_trim_target_time_ - current_time; - } - ScopedThreadStateChange tsc(self, kSleeping); - usleep(wait_time / 1000); // Usleep takes microseconds. - } +void Heap::DoPendingCollectorTransition() { + CollectorType desired_collector_type = desired_collector_type_; // Launch homogeneous space compaction if it is desired. if (desired_collector_type == kCollectorTypeHomogeneousSpaceCompact) { if (!CareAboutPauseTimes()) { PerformHomogeneousSpaceCompact(); + } else { + VLOG(gc) << "Homogeneous compaction ignored due to jank perceptible process state"; } - // No need to Trim(). Homogeneous space compaction may free more virtual and physical memory. - desired_collector_type = collector_type_; - return; + } else { + TransitionCollector(desired_collector_type); } - // Transition the collector if the desired collector type is not the same as the current - // collector type. - TransitionCollector(desired_collector_type); +} + +void Heap::Trim(Thread* self) { if (!CareAboutPauseTimes()) { + ATRACE_BEGIN("Deflating monitors"); // Deflate the monitors, this can cause a pause but shouldn't matter since we don't care // about pauses. Runtime* runtime = Runtime::Current(); @@ -984,9 +966,10 @@ void Heap::DoPendingTransitionOrTrim() { VLOG(heap) << "Deflating " << count << " monitors took " << PrettyDuration(NanoTime() - start_time); runtime->GetThreadList()->ResumeAll(); + ATRACE_END(); } - // Do a heap trim if it is needed. - Trim(); + TrimIndirectReferenceTables(self); + TrimSpaces(self); } class TrimIndirectReferenceTableClosure : public Closure { @@ -1004,17 +987,22 @@ class TrimIndirectReferenceTableClosure : public Closure { Barrier* const barrier_; }; - -void Heap::Trim() { - Thread* self = Thread::Current(); - { - MutexLock mu(self, *heap_trim_request_lock_); - if (!heap_trim_request_pending_ || last_trim_time_ + kHeapTrimWait >= NanoTime()) { - return; - } - last_trim_time_ = NanoTime(); - heap_trim_request_pending_ = false; - } +void Heap::TrimIndirectReferenceTables(Thread* self) { + ScopedObjectAccess soa(self); + ATRACE_BEGIN(__FUNCTION__); + JavaVMExt* vm = soa.Vm(); + // Trim globals indirect reference table. + vm->TrimGlobals(); + // Trim locals indirect reference tables. + Barrier barrier(0); + TrimIndirectReferenceTableClosure closure(&barrier); + ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); + size_t barrier_count = Runtime::Current()->GetThreadList()->RunCheckpoint(&closure); + barrier.Increment(self, barrier_count); + ATRACE_END(); +} + +void Heap::TrimSpaces(Thread* self) { { // Need to do this before acquiring the locks since we don't want to get suspended while // holding any locks. @@ -1026,20 +1014,8 @@ void Heap::Trim() { WaitForGcToCompleteLocked(kGcCauseTrim, self); collector_type_running_ = kCollectorTypeHeapTrim; } - // Trim reference tables. - { - ScopedObjectAccess soa(self); - JavaVMExt* vm = soa.Vm(); - // Trim globals indirect reference table. - vm->TrimGlobals(); - // Trim locals indirect reference tables. - Barrier barrier(0); - TrimIndirectReferenceTableClosure closure(&barrier); - ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); - size_t barrier_count = Runtime::Current()->GetThreadList()->RunCheckpoint(&closure); - barrier.Increment(self, barrier_count); - } - uint64_t start_ns = NanoTime(); + ATRACE_BEGIN(__FUNCTION__); + const uint64_t start_ns = NanoTime(); // Trim the managed spaces. uint64_t total_alloc_space_allocated = 0; uint64_t total_alloc_space_size = 0; @@ -1089,6 +1065,7 @@ void Heap::Trim() { << PrettyDuration(end_ns - gc_heap_end_ns) << ", advised=" << PrettySize(native_reclaimed) << ") heaps. Managed heap utilization of " << static_cast<int>(100 * managed_utilization) << "%."; + ATRACE_END(); } bool Heap::IsValidObjectAddress(const mirror::Object* obj) const { @@ -1639,7 +1616,6 @@ HomogeneousSpaceCompactResult Heap::PerformHomogeneousSpaceCompact() { return HomogeneousSpaceCompactResult::kSuccess; } - void Heap::TransitionCollector(CollectorType collector_type) { if (collector_type == collector_type_) { return; @@ -2207,7 +2183,7 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus collector->Run(gc_cause, clear_soft_references || runtime->IsZygote()); total_objects_freed_ever_ += GetCurrentGcIteration()->GetFreedObjects(); total_bytes_freed_ever_ += GetCurrentGcIteration()->GetFreedBytes(); - RequestHeapTrim(); + RequestTrim(self); // Enqueue cleared references. reference_processor_.EnqueueClearedReferences(self); // Grow the heap so that we know when to perform the next GC. @@ -3032,52 +3008,109 @@ void Heap::RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj) RequestConcurrentGC(self); } -void Heap::RequestConcurrentGC(Thread* self) { - // Make sure that we can do a concurrent GC. +class Heap::ConcurrentGCTask : public HeapTask { + public: + explicit ConcurrentGCTask(uint64_t target_time) : HeapTask(target_time) { } + virtual void Run(Thread* self) OVERRIDE { + gc::Heap* heap = Runtime::Current()->GetHeap(); + heap->ConcurrentGC(self); + heap->ClearConcurrentGCRequest(); + } +}; + +static bool CanAddHeapTask(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_) { Runtime* runtime = Runtime::Current(); - if (runtime == nullptr || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self) || - self->IsHandlingStackOverflow()) { - return; + return runtime != nullptr && runtime->IsFinishedStarting() && !runtime->IsShuttingDown(self) && + !self->IsHandlingStackOverflow(); +} + +void Heap::ClearConcurrentGCRequest() { + concurrent_gc_pending_.StoreRelaxed(false); +} + +void Heap::RequestConcurrentGC(Thread* self) { + if (CanAddHeapTask(self) && + concurrent_gc_pending_.CompareExchangeStrongSequentiallyConsistent(false, true)) { + task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime())); // Start straight away. } - NotifyConcurrentGCRequest(self); } void Heap::ConcurrentGC(Thread* self) { - if (Runtime::Current()->IsShuttingDown(self)) { - return; - } - // Wait for any GCs currently running to finish. - if (WaitForGcToComplete(kGcCauseBackground, self) == collector::kGcTypeNone) { - // If the we can't run the GC type we wanted to run, find the next appropriate one and try that - // instead. E.g. can't do partial, so do full instead. - if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) == - collector::kGcTypeNone) { - for (collector::GcType gc_type : gc_plan_) { - // Attempt to run the collector, if we succeed, we are done. - if (gc_type > next_gc_type_ && - CollectGarbageInternal(gc_type, kGcCauseBackground, false) != collector::kGcTypeNone) { - break; + if (!Runtime::Current()->IsShuttingDown(self)) { + // Wait for any GCs currently running to finish. + if (WaitForGcToComplete(kGcCauseBackground, self) == collector::kGcTypeNone) { + // If the we can't run the GC type we wanted to run, find the next appropriate one and try that + // instead. E.g. can't do partial, so do full instead. + if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) == + collector::kGcTypeNone) { + for (collector::GcType gc_type : gc_plan_) { + // Attempt to run the collector, if we succeed, we are done. + if (gc_type > next_gc_type_ && + CollectGarbageInternal(gc_type, kGcCauseBackground, false) != + collector::kGcTypeNone) { + break; + } } } } } } +class Heap::CollectorTransitionTask : public HeapTask { + public: + explicit CollectorTransitionTask(uint64_t target_time) : HeapTask(target_time) { } + virtual void Run(Thread* self) OVERRIDE { + gc::Heap* heap = Runtime::Current()->GetHeap(); + heap->DoPendingCollectorTransition(); + heap->ClearPendingCollectorTransition(self); + } +}; + +void Heap::ClearPendingCollectorTransition(Thread* self) { + MutexLock mu(self, *pending_task_lock_); + pending_collector_transition_ = nullptr; +} + void Heap::RequestCollectorTransition(CollectorType desired_collector_type, uint64_t delta_time) { Thread* self = Thread::Current(); + desired_collector_type_ = desired_collector_type; + if (desired_collector_type_ == collector_type_ || !CanAddHeapTask(self)) { + return; + } + CollectorTransitionTask* added_task = nullptr; + const uint64_t target_time = NanoTime() + delta_time; { - MutexLock mu(self, *heap_trim_request_lock_); - if (desired_collector_type_ == desired_collector_type) { + MutexLock mu(self, *pending_task_lock_); + // If we have an existing collector transition, update the targe time to be the new target. + if (pending_collector_transition_ != nullptr) { + task_processor_->UpdateTargetRunTime(self, pending_collector_transition_, target_time); return; } - heap_transition_or_trim_target_time_ = - std::max(heap_transition_or_trim_target_time_, NanoTime() + delta_time); - desired_collector_type_ = desired_collector_type; + added_task = new CollectorTransitionTask(target_time); + pending_collector_transition_ = added_task; + } + task_processor_->AddTask(self, added_task); +} + +class Heap::HeapTrimTask : public HeapTask { + public: + explicit HeapTrimTask(uint64_t delta_time) : HeapTask(NanoTime() + delta_time) { } + virtual void Run(Thread* self) OVERRIDE { + gc::Heap* heap = Runtime::Current()->GetHeap(); + heap->Trim(self); + heap->ClearPendingTrim(self); } - SignalHeapTrimDaemon(self); +}; + +void Heap::ClearPendingTrim(Thread* self) { + MutexLock mu(self, *pending_task_lock_); + pending_heap_trim_ = nullptr; } -void Heap::RequestHeapTrim() { +void Heap::RequestTrim(Thread* self) { + if (!CanAddHeapTask(self)) { + return; + } // GC completed and now we must decide whether to request a heap trim (advising pages back to the // kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans // a space it will hold its lock and can become a cause of jank. @@ -3090,42 +3123,17 @@ void Heap::RequestHeapTrim() { // to utilization (which is probably inversely proportional to how much benefit we can expect). // We could try mincore(2) but that's only a measure of how many pages we haven't given away, // not how much use we're making of those pages. - - Thread* self = Thread::Current(); - Runtime* runtime = Runtime::Current(); - if (runtime == nullptr || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self) || - runtime->IsZygote()) { - // Ignore the request if we are the zygote to prevent app launching lag due to sleep in heap - // trimmer daemon. b/17310019 - // Heap trimming isn't supported without a Java runtime or Daemons (such as at dex2oat time) - // Also: we do not wish to start a heap trim if the runtime is shutting down (a racy check - // as we don't hold the lock while requesting the trim). - return; - } + HeapTrimTask* added_task = nullptr; { - MutexLock mu(self, *heap_trim_request_lock_); - if (last_trim_time_ + kHeapTrimWait >= NanoTime()) { - // We have done a heap trim in the last kHeapTrimWait nanosecs, don't request another one - // just yet. + MutexLock mu(self, *pending_task_lock_); + if (pending_heap_trim_ != nullptr) { + // Already have a heap trim request in task processor, ignore this request. return; } - heap_trim_request_pending_ = true; - uint64_t current_time = NanoTime(); - if (heap_transition_or_trim_target_time_ < current_time) { - heap_transition_or_trim_target_time_ = current_time + kHeapTrimWait; - } + added_task = new HeapTrimTask(kHeapTrimWait); + pending_heap_trim_ = added_task; } - // Notify the daemon thread which will actually do the heap trim. - SignalHeapTrimDaemon(self); -} - -void Heap::SignalHeapTrimDaemon(Thread* self) { - JNIEnv* env = self->GetJniEnv(); - DCHECK(WellKnownClasses::java_lang_Daemons != nullptr); - DCHECK(WellKnownClasses::java_lang_Daemons_requestHeapTrim != nullptr); - env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons, - WellKnownClasses::java_lang_Daemons_requestHeapTrim); - CHECK(!env->ExceptionCheck()); + task_processor_->AddTask(self, added_task); } void Heap::RevokeThreadLocalBuffers(Thread* thread) { @@ -3153,7 +3161,7 @@ void Heap::RevokeAllThreadLocalBuffers() { } bool Heap::IsGCRequestPending() const { - return concurrent_start_bytes_ != std::numeric_limits<size_t>::max(); + return concurrent_gc_pending_.LoadRelaxed(); } void Heap::RunFinalization(JNIEnv* env) { @@ -3235,7 +3243,7 @@ void Heap::AddModUnionTable(accounting::ModUnionTable* mod_union_table) { } void Heap::CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) { - CHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) || + CHECK(c == nullptr || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) || (c->IsVariableSize() || c->GetObjectSize() == byte_count)); CHECK_GE(byte_count, sizeof(mirror::Object)); } @@ -3272,25 +3280,5 @@ void Heap::ClearMarkedObjects() { } } -void Heap::WaitForConcurrentGCRequest(Thread* self) { - ScopedThreadStateChange tsc(self, kBlocked); - MutexLock mu(self, *gc_request_lock_); - conc_gc_running_ = false; - while (!gc_request_pending_) { - gc_request_cond_->Wait(self); - } - gc_request_pending_ = false; - conc_gc_running_ = true; -} - -void Heap::NotifyConcurrentGCRequest(Thread* self) { - ScopedThreadStateChange tsc(self, kBlocked); - MutexLock mu(self, *gc_request_lock_); - if (!conc_gc_running_) { - gc_request_pending_ = true; - gc_request_cond_->Signal(self); - } -} - } // namespace gc } // namespace art diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index cf94eb6a9d..1738124c0c 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -57,6 +57,7 @@ namespace mirror { namespace gc { class ReferenceProcessor; +class TaskProcessor; namespace accounting { class HeapBitmap; @@ -470,11 +471,11 @@ class Heap { void DumpForSigQuit(std::ostream& os); - // Do a pending heap transition or trim. - void DoPendingTransitionOrTrim() LOCKS_EXCLUDED(heap_trim_request_lock_); + // Do a pending collector transition. + void DoPendingCollectorTransition(); - // Trim the managed and native heaps by releasing unused memory back to the OS. - void Trim() LOCKS_EXCLUDED(heap_trim_request_lock_); + // Deflate monitors, ... and trim the spaces. + void Trim(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_); void RevokeThreadLocalBuffers(Thread* thread); void RevokeRosAllocThreadLocalBuffers(Thread* thread); @@ -606,15 +607,25 @@ class Heap { ReferenceProcessor* GetReferenceProcessor() { return &reference_processor_; } + TaskProcessor* GetTaskProcessor() { + return task_processor_.get(); + } bool HasZygoteSpace() const { return zygote_space_ != nullptr; } - void WaitForConcurrentGCRequest(Thread* self) LOCKS_EXCLUDED(gc_request_lock_); - void NotifyConcurrentGCRequest(Thread* self) LOCKS_EXCLUDED(gc_request_lock_); + // Request an asynchronous trim. + void RequestTrim(Thread* self) LOCKS_EXCLUDED(pending_task_lock_); + + // Request asynchronous GC. + void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(pending_task_lock_); private: + class ConcurrentGCTask; + class CollectorTransitionTask; + class HeapTrimTask; + // Compact source space to target space. void Compact(space::ContinuousMemMapAllocSpace* target_space, space::ContinuousMemMapAllocSpace* source_space, @@ -705,12 +716,10 @@ class Heap { EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_); void RequestCollectorTransition(CollectorType desired_collector_type, uint64_t delta_time) - LOCKS_EXCLUDED(heap_trim_request_lock_); - void RequestHeapTrim() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_); + LOCKS_EXCLUDED(pending_task_lock_); + void RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void RequestConcurrentGC(Thread* self) - LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_); bool IsGCRequestPending() const; // Sometimes CollectGarbageInternal decides to run a different Gc than you requested. Returns @@ -771,10 +780,6 @@ class Heap { // Clear cards and update the mod union table. void ProcessCards(TimingLogger* timings, bool use_rem_sets); - // Signal the heap trim daemon that there is something to do, either a heap transition or heap - // trim. - void SignalHeapTrimDaemon(Thread* self); - // Push an object onto the allocation stack. void PushOnAllocationStack(Thread* self, mirror::Object** obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -783,12 +788,22 @@ class Heap { void PushOnThreadLocalAllocationStackWithInternalGC(Thread* thread, mirror::Object** obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void ClearConcurrentGCRequest(); + void ClearPendingTrim(Thread* self) LOCKS_EXCLUDED(pending_task_lock_); + void ClearPendingCollectorTransition(Thread* self) LOCKS_EXCLUDED(pending_task_lock_); + // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark // sweep GC, false for other GC types. bool IsGcConcurrent() const ALWAYS_INLINE { return collector_type_ == kCollectorTypeCMS || collector_type_ == kCollectorTypeCC; } + // Trim the managed and native spaces by releasing unused memory back to the OS. + void TrimSpaces(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_); + + // Trim 0 pages at the end of reference tables. + void TrimIndirectReferenceTables(Thread* self); + // All-known continuous spaces, where objects lie within fixed bounds. std::vector<space::ContinuousSpace*> continuous_spaces_; @@ -835,14 +850,8 @@ class Heap { // Desired collector type, heap trimming daemon transitions the heap if it is != collector_type_. CollectorType desired_collector_type_; - // Lock which guards heap trim requests. - Mutex* heap_trim_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - // When we want to perform the next heap trim (nano seconds). - uint64_t last_trim_time_ GUARDED_BY(heap_trim_request_lock_); - // When we want to perform the next heap transition (nano seconds) or heap trim. - uint64_t heap_transition_or_trim_target_time_ GUARDED_BY(heap_trim_request_lock_); - // If we have a heap trim request pending. - bool heap_trim_request_pending_ GUARDED_BY(heap_trim_request_lock_); + // Lock which guards pending tasks. + Mutex* pending_task_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; // How many GC threads we may use for paused parts of garbage collection. const size_t parallel_gc_threads_; @@ -879,15 +888,12 @@ class Heap { Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; std::unique_ptr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_); - // Guards concurrent GC requests. - Mutex* gc_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - std::unique_ptr<ConditionVariable> gc_request_cond_ GUARDED_BY(gc_request_lock_); - bool gc_request_pending_ GUARDED_BY(gc_request_lock_); - bool conc_gc_running_ GUARDED_BY(gc_request_lock_); - // Reference processor; ReferenceProcessor reference_processor_; + // Task processor, proxies heap trim requests to the daemon threads. + std::unique_ptr<TaskProcessor> task_processor_; + // True while the garbage collector is running. volatile CollectorType collector_type_running_ GUARDED_BY(gc_complete_lock_); @@ -1060,9 +1066,17 @@ class Heap { // Count for performed homogeneous space compaction. Atomic<size_t> count_performed_homogeneous_space_compaction_; + // Whether or not a concurrent GC is pending. + Atomic<bool> concurrent_gc_pending_; + + // Active tasks which we can modify (change target time, desired collector type, etc..). + CollectorTransitionTask* pending_collector_transition_ GUARDED_BY(pending_task_lock_); + HeapTrimTask* pending_heap_trim_ GUARDED_BY(pending_task_lock_); + // Whether or not we use homogeneous space compaction to avoid OOM errors. bool use_homogeneous_space_compaction_for_oom_; + friend class CollectorTransitionTask; friend class collector::GarbageCollector; friend class collector::MarkCompact; friend class collector::MarkSweep; diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc index 99bd63fa8a..01e8795669 100644 --- a/runtime/gc/reference_processor.cc +++ b/runtime/gc/reference_processor.cc @@ -23,11 +23,14 @@ #include "reflection.h" #include "ScopedLocalRef.h" #include "scoped_thread_state_change.h" +#include "task_processor.h" #include "well_known_classes.h" namespace art { namespace gc { +static constexpr bool kAsyncReferenceQueueAdd = false; + ReferenceProcessor::ReferenceProcessor() : process_references_args_(nullptr, nullptr, nullptr), preserving_references_(false), @@ -213,17 +216,43 @@ void ReferenceProcessor::UpdateRoots(IsMarkedCallback* callback, void* arg) { cleared_references_.UpdateRoots(callback, arg); } +class ClearedReferenceTask : public HeapTask { + public: + explicit ClearedReferenceTask(jobject cleared_references) + : HeapTask(NanoTime()), cleared_references_(cleared_references) { + } + virtual void Run(Thread* thread) { + ScopedObjectAccess soa(thread); + jvalue args[1]; + args[0].l = cleared_references_; + InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_ReferenceQueue_add, args); + soa.Env()->DeleteGlobalRef(cleared_references_); + } + + private: + const jobject cleared_references_; +}; + void ReferenceProcessor::EnqueueClearedReferences(Thread* self) { Locks::mutator_lock_->AssertNotHeld(self); + // When a runtime isn't started there are no reference queues to care about so ignore. if (!cleared_references_.IsEmpty()) { - // When a runtime isn't started there are no reference queues to care about so ignore. if (LIKELY(Runtime::Current()->IsStarted())) { - ScopedObjectAccess soa(self); - ScopedLocalRef<jobject> arg(self->GetJniEnv(), - soa.AddLocalReference<jobject>(cleared_references_.GetList())); - jvalue args[1]; - args[0].l = arg.get(); - InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_ReferenceQueue_add, args); + jobject cleared_references; + { + ReaderMutexLock mu(self, *Locks::mutator_lock_); + cleared_references = self->GetJniEnv()->vm->AddGlobalRef( + self, cleared_references_.GetList()); + } + if (kAsyncReferenceQueueAdd) { + // TODO: This can cause RunFinalization to terminate before newly freed objects are + // finalized since they may not be enqueued by the time RunFinalization starts. + Runtime::Current()->GetHeap()->GetTaskProcessor()->AddTask( + self, new ClearedReferenceTask(cleared_references)); + } else { + ClearedReferenceTask task(cleared_references); + task.Run(self); + } } cleared_references_.Clear(); } diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc index 74d1a2b7db..ced25a40bb 100644 --- a/runtime/gc/space/rosalloc_space.cc +++ b/runtime/gc/space/rosalloc_space.cc @@ -365,8 +365,9 @@ void RosAllocSpace::Clear() { mark_bitmap_->Clear(); SetEnd(begin_ + starting_size_); delete rosalloc_; - rosalloc_ = CreateRosAlloc(mem_map_->Begin(), starting_size_, initial_size_, Capacity(), - low_memory_mode_, Runtime::Current()->RunningOnValgrind()); + rosalloc_ = CreateRosAlloc(mem_map_->Begin(), starting_size_, initial_size_, + NonGrowthLimitCapacity(), low_memory_mode_, + Runtime::Current()->RunningOnValgrind()); SetFootprintLimit(footprint_limit); } diff --git a/runtime/gc/task_processor.cc b/runtime/gc/task_processor.cc new file mode 100644 index 0000000000..1a3c6f5399 --- /dev/null +++ b/runtime/gc/task_processor.cc @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "task_processor.h" + +#include "scoped_thread_state_change.h" + +namespace art { +namespace gc { + +TaskProcessor::TaskProcessor() + : lock_(new Mutex("Task processor lock", kReferenceProcessorLock)), is_running_(false) { + // Piggyback off the reference processor lock level. + cond_.reset(new ConditionVariable("Task processor condition", *lock_)); +} + +TaskProcessor::~TaskProcessor() { + delete lock_; +} + +void TaskProcessor::AddTask(Thread* self, HeapTask* task) { + ScopedThreadStateChange tsc(self, kBlocked); + MutexLock mu(self, *lock_); + tasks_.insert(task); + cond_->Signal(self); +} + +HeapTask* TaskProcessor::GetTask(Thread* self) { + ScopedThreadStateChange tsc(self, kBlocked); + MutexLock mu(self, *lock_); + while (true) { + if (tasks_.empty()) { + if (!is_running_) { + return nullptr; + } + cond_->Wait(self); // Empty queue, wait until we are signalled. + } else { + // Non empty queue, look at the top element and see if we are ready to run it. + const uint64_t current_time = NanoTime(); + HeapTask* task = *tasks_.begin(); + // If we are shutting down, return the task right away without waiting. Otherwise return the + // task if it is late enough. + uint64_t target_time = task->GetTargetRunTime(); + if (!is_running_ || target_time <= current_time) { + tasks_.erase(tasks_.begin()); + return task; + } + DCHECK_GT(target_time, current_time); + // Wait untl we hit the target run time. + const uint64_t delta_time = target_time - current_time; + const uint64_t ms_delta = NsToMs(delta_time); + const uint64_t ns_delta = delta_time - MsToNs(ms_delta); + cond_->TimedWait(self, static_cast<int64_t>(ms_delta), static_cast<int32_t>(ns_delta)); + } + } + UNREACHABLE(); + return nullptr; +} + +void TaskProcessor::UpdateTargetRunTime(Thread* self, HeapTask* task, uint64_t new_target_time) { + MutexLock mu(self, *lock_); + // Find the task. + auto range = tasks_.equal_range(task); + for (auto it = range.first; it != range.second; ++it) { + if (*it == task) { + // Check if the target time was updated, if so re-insert then wait. + if (new_target_time != task->GetTargetRunTime()) { + tasks_.erase(it); + task->SetTargetRunTime(new_target_time); + tasks_.insert(task); + // If we became the first task then we may need to signal since we changed the task that we + // are sleeping on. + if (*tasks_.begin() == task) { + cond_->Signal(self); + } + return; + } + } + } +} + +bool TaskProcessor::IsRunning() const { + MutexLock mu(Thread::Current(), *lock_); + return is_running_; +} + +void TaskProcessor::Stop(Thread* self) { + MutexLock mu(self, *lock_); + is_running_ = false; + cond_->Broadcast(self); +} + +void TaskProcessor::Start(Thread* self) { + MutexLock mu(self, *lock_); + is_running_ = true; +} + +void TaskProcessor::RunAllTasks(Thread* self) { + while (true) { + // Wait and get a task, may be interrupted. + HeapTask* task = GetTask(self); + if (task != nullptr) { + task->Run(self); + task->Finalize(); + } else if (!IsRunning()) { + break; + } + } +} + +} // namespace gc +} // namespace art diff --git a/runtime/gc/task_processor.h b/runtime/gc/task_processor.h new file mode 100644 index 0000000000..765f03557e --- /dev/null +++ b/runtime/gc/task_processor.h @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_GC_TASK_PROCESSOR_H_ +#define ART_RUNTIME_GC_TASK_PROCESSOR_H_ + +#include <memory> +#include <set> + +#include "base/mutex.h" +#include "globals.h" +#include "thread_pool.h" + +namespace art { +namespace gc { + +class HeapTask : public SelfDeletingTask { + public: + explicit HeapTask(uint64_t target_run_time) : target_run_time_(target_run_time) { + } + uint64_t GetTargetRunTime() const { + return target_run_time_; + } + + private: + // Update the updated_target_run_time_, the task processor will re-insert the task when it is + // popped and update the target_run_time_. + void SetTargetRunTime(uint64_t new_target_run_time) { + target_run_time_ = new_target_run_time; + } + + // Time in ns at which we want the task to run. + uint64_t target_run_time_; + + friend class TaskProcessor; +}; + +// Used to process GC tasks (heap trim, heap transitions, concurrent GC). +class TaskProcessor { + public: + TaskProcessor(); + virtual ~TaskProcessor(); + void AddTask(Thread* self, HeapTask* task) LOCKS_EXCLUDED(lock_); + HeapTask* GetTask(Thread* self) LOCKS_EXCLUDED(lock_); + void Start(Thread* self) LOCKS_EXCLUDED(lock_); + // Stop tells the RunAllTasks to finish up the remaining tasks as soon as + // possible then return. + void Stop(Thread* self) LOCKS_EXCLUDED(lock_); + void RunAllTasks(Thread* self) LOCKS_EXCLUDED(lock_); + bool IsRunning() const LOCKS_EXCLUDED(lock_); + void UpdateTargetRunTime(Thread* self, HeapTask* target_time, uint64_t new_target_time) + LOCKS_EXCLUDED(lock_); + + private: + class CompareByTargetRunTime { + public: + bool operator()(const HeapTask* a, const HeapTask* b) const { + return a->GetTargetRunTime() < b->GetTargetRunTime(); + } + }; + + mutable Mutex* lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + bool is_running_ GUARDED_BY(lock_); + std::unique_ptr<ConditionVariable> cond_ GUARDED_BY(lock_); + std::multiset<HeapTask*, CompareByTargetRunTime> tasks_ GUARDED_BY(lock_); +}; + +} // namespace gc +} // namespace art + +#endif // ART_RUNTIME_GC_TASK_PROCESSOR_H_ diff --git a/runtime/gc/task_processor_test.cc b/runtime/gc/task_processor_test.cc new file mode 100644 index 0000000000..5dd6d8fb7b --- /dev/null +++ b/runtime/gc/task_processor_test.cc @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common_runtime_test.h" +#include "task_processor.h" +#include "thread_pool.h" +#include "thread-inl.h" +#include "utils.h" + +namespace art { +namespace gc { + +class TaskProcessorTest : public CommonRuntimeTest { + public: +}; + +class RecursiveTask : public HeapTask { + public: + RecursiveTask(TaskProcessor* task_processor, Atomic<size_t>* counter, size_t max_recursion) + : HeapTask(NanoTime() + MsToNs(10)), task_processor_(task_processor), counter_(counter), + max_recursion_(max_recursion) { + } + virtual void Run(Thread* self) OVERRIDE { + if (max_recursion_ > 0) { + task_processor_->AddTask(self, + new RecursiveTask(task_processor_, counter_, max_recursion_ - 1)); + counter_->FetchAndAddSequentiallyConsistent(1U); + } + } + + private: + TaskProcessor* const task_processor_; + Atomic<size_t>* const counter_; + const size_t max_recursion_; +}; + +class WorkUntilDoneTask : public SelfDeletingTask { + public: + WorkUntilDoneTask(TaskProcessor* task_processor, Atomic<bool>* done_running) + : task_processor_(task_processor), done_running_(done_running) { + } + virtual void Run(Thread* self) OVERRIDE { + task_processor_->RunAllTasks(self); + done_running_->StoreSequentiallyConsistent(true); + } + + private: + TaskProcessor* const task_processor_; + Atomic<bool>* done_running_; +}; + +TEST_F(TaskProcessorTest, Interrupt) { + ThreadPool thread_pool("task processor test", 1U); + Thread* const self = Thread::Current(); + TaskProcessor task_processor; + static constexpr size_t kRecursion = 10; + Atomic<bool> done_running(false); + Atomic<size_t> counter(0); + task_processor.AddTask(self, new RecursiveTask(&task_processor, &counter, kRecursion)); + task_processor.Start(self); + // Add a task which will wait until interrupted to the thread pool. + thread_pool.AddTask(self, new WorkUntilDoneTask(&task_processor, &done_running)); + thread_pool.StartWorkers(self); + ASSERT_FALSE(done_running); + // Wait until all the tasks are done, but since we didn't interrupt, done_running should be 0. + while (counter.LoadSequentiallyConsistent() != kRecursion) { + usleep(10); + } + ASSERT_FALSE(done_running); + task_processor.Stop(self); + thread_pool.Wait(self, true, false); + // After the interrupt and wait, the WorkUntilInterruptedTasktask should have terminated and + // set done_running_ to true. + ASSERT_TRUE(done_running.LoadSequentiallyConsistent()); + + // Test that we finish remaining tasks before returning from RunTasksUntilInterrupted. + counter.StoreSequentiallyConsistent(0); + done_running.StoreSequentiallyConsistent(false); + // Self interrupt before any of the other tasks run, but since we added them we should keep on + // working until all the tasks are completed. + task_processor.Stop(self); + task_processor.AddTask(self, new RecursiveTask(&task_processor, &counter, kRecursion)); + thread_pool.AddTask(self, new WorkUntilDoneTask(&task_processor, &done_running)); + thread_pool.StartWorkers(self); + thread_pool.Wait(self, true, false); + ASSERT_TRUE(done_running.LoadSequentiallyConsistent()); + ASSERT_EQ(counter.LoadSequentiallyConsistent(), kRecursion); +} + +class TestOrderTask : public HeapTask { + public: + explicit TestOrderTask(uint64_t expected_time, size_t expected_counter, size_t* counter) + : HeapTask(expected_time), expected_counter_(expected_counter), counter_(counter) { + } + virtual void Run(Thread* thread) OVERRIDE { + UNUSED(thread); // Fix cppling bug. + ASSERT_EQ(*counter_, expected_counter_); + ++*counter_; + } + + private: + const size_t expected_counter_; + size_t* const counter_; +}; + +TEST_F(TaskProcessorTest, Ordering) { + static const size_t kNumTasks = 25; + const uint64_t current_time = NanoTime(); + Thread* const self = Thread::Current(); + TaskProcessor task_processor; + task_processor.Stop(self); + size_t counter = 0; + std::vector<std::pair<uint64_t, size_t>> orderings; + for (size_t i = 0; i < kNumTasks; ++i) { + orderings.push_back(std::make_pair(current_time + MsToNs(10U * i), i)); + } + for (size_t i = 0; i < kNumTasks; ++i) { + std::swap(orderings[i], orderings[(i * 87654231 + 12345) % orderings.size()]); + } + for (const auto& pair : orderings) { + auto* task = new TestOrderTask(pair.first, pair.second, &counter); + task_processor.AddTask(self, task); + } + ThreadPool thread_pool("task processor test", 1U); + Atomic<bool> done_running(false); + // Add a task which will wait until interrupted to the thread pool. + thread_pool.AddTask(self, new WorkUntilDoneTask(&task_processor, &done_running)); + ASSERT_FALSE(done_running.LoadSequentiallyConsistent()); + thread_pool.StartWorkers(self); + thread_pool.Wait(self, true, false); + ASSERT_TRUE(done_running.LoadSequentiallyConsistent()); + ASSERT_EQ(counter, kNumTasks); +} + +} // namespace gc +} // namespace art diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index a348432340..f503b354f7 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -34,6 +34,7 @@ #include "gc/heap.h" #include "gc/space/dlmalloc_space.h" #include "gc/space/image_space.h" +#include "gc/task_processor.h" #include "intern_table.h" #include "jni_internal.h" #include "mirror/art_method-inl.h" @@ -213,19 +214,32 @@ static void VMRuntime_updateProcessState(JNIEnv*, jobject, jint process_state) { runtime->UpdateProfilerState(process_state); } -static void VMRuntime_trimHeap(JNIEnv*, jobject) { - Runtime::Current()->GetHeap()->DoPendingTransitionOrTrim(); +static void VMRuntime_trimHeap(JNIEnv* env, jobject) { + Runtime::Current()->GetHeap()->Trim(ThreadForEnv(env)); } static void VMRuntime_concurrentGC(JNIEnv* env, jobject) { Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env)); } +static void VMRuntime_requestHeapTrim(JNIEnv* env, jobject) { + Runtime::Current()->GetHeap()->RequestTrim(ThreadForEnv(env)); +} + static void VMRuntime_requestConcurrentGC(JNIEnv* env, jobject) { - Runtime::Current()->GetHeap()->NotifyConcurrentGCRequest(ThreadForEnv(env)); + Runtime::Current()->GetHeap()->RequestConcurrentGC(ThreadForEnv(env)); } -static void VMRuntime_waitForConcurrentGCRequest(JNIEnv* env, jobject) { - Runtime::Current()->GetHeap()->WaitForConcurrentGCRequest(ThreadForEnv(env)); + +static void VMRuntime_startHeapTaskProcessor(JNIEnv* env, jobject) { + Runtime::Current()->GetHeap()->GetTaskProcessor()->Start(ThreadForEnv(env)); +} + +static void VMRuntime_stopHeapTaskProcessor(JNIEnv* env, jobject) { + Runtime::Current()->GetHeap()->GetTaskProcessor()->Stop(ThreadForEnv(env)); +} + +static void VMRuntime_runHeapTasks(JNIEnv* env, jobject) { + Runtime::Current()->GetHeap()->GetTaskProcessor()->RunAllTasks(ThreadForEnv(env)); } typedef std::map<std::string, mirror::String*> StringTable; @@ -566,8 +580,6 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(VMRuntime, classPath, "()Ljava/lang/String;"), NATIVE_METHOD(VMRuntime, clearGrowthLimit, "()V"), NATIVE_METHOD(VMRuntime, concurrentGC, "()V"), - NATIVE_METHOD(VMRuntime, requestConcurrentGC, "()V"), - NATIVE_METHOD(VMRuntime, waitForConcurrentGCRequest, "()V"), NATIVE_METHOD(VMRuntime, disableJitCompilation, "()V"), NATIVE_METHOD(VMRuntime, getTargetHeapUtilization, "()F"), NATIVE_METHOD(VMRuntime, isDebuggerActive, "!()Z"), @@ -578,8 +590,13 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(VMRuntime, setTargetSdkVersionNative, "(I)V"), NATIVE_METHOD(VMRuntime, registerNativeAllocation, "(I)V"), NATIVE_METHOD(VMRuntime, registerNativeFree, "(I)V"), + NATIVE_METHOD(VMRuntime, requestConcurrentGC, "()V"), + NATIVE_METHOD(VMRuntime, requestHeapTrim, "()V"), + NATIVE_METHOD(VMRuntime, runHeapTasks, "()V"), NATIVE_METHOD(VMRuntime, updateProcessState, "(I)V"), + NATIVE_METHOD(VMRuntime, startHeapTaskProcessor, "()V"), NATIVE_METHOD(VMRuntime, startJitCompilation, "()V"), + NATIVE_METHOD(VMRuntime, stopHeapTaskProcessor, "()V"), NATIVE_METHOD(VMRuntime, trimHeap, "()V"), NATIVE_METHOD(VMRuntime, vmVersion, "()Ljava/lang/String;"), NATIVE_METHOD(VMRuntime, vmLibrary, "()Ljava/lang/String;"), diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc index f1a04cb35d..5f68d6000a 100644 --- a/runtime/native/dalvik_system_ZygoteHooks.cc +++ b/runtime/native/dalvik_system_ZygoteHooks.cc @@ -86,9 +86,15 @@ static void EnableDebugFeatures(uint32_t debug_flags) { } debug_flags &= ~DEBUG_ENABLE_DEBUGGER; - // These two are for backwards compatibility with Dalvik. + if ((debug_flags & DEBUG_ENABLE_SAFEMODE) != 0) { + // Ensure that any (secondary) oat files will be interpreted. + Runtime* runtime = Runtime::Current(); + runtime->AddCompilerOption("--compiler-filter=interpret-only"); + debug_flags &= ~DEBUG_ENABLE_SAFEMODE; + } + + // This is for backwards compatibility with Dalvik. debug_flags &= ~DEBUG_ENABLE_ASSERT; - debug_flags &= ~DEBUG_ENABLE_SAFEMODE; if (debug_flags != 0) { LOG(ERROR) << StringPrintf("Unknown bits set in debug_flags: %#x", debug_flags); diff --git a/runtime/runtime.h b/runtime/runtime.h index 39fd910893..e31996338d 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -133,6 +133,10 @@ class Runtime { return compiler_options_; } + void AddCompilerOption(std::string option) { + compiler_options_.push_back(option); + } + const std::vector<std::string>& GetImageCompilerOptions() const { return image_compiler_options_; } diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h index 8c080673f9..79b57afedd 100644 --- a/runtime/thread_pool.h +++ b/runtime/thread_pool.h @@ -36,10 +36,18 @@ class Closure { class Task : public Closure { public: - // Called when references reaches 0. + // Called after Closure::Run has been called. virtual void Finalize() { } }; +class SelfDeletingTask : public Task { + public: + virtual ~SelfDeletingTask() { } + virtual void Finalize() { + delete this; + } +}; + class ThreadPoolWorker { public: static const size_t kDefaultStackSize = 1 * MB; diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt index 5f86f1e047..a55a13743c 100644 --- a/test/800-smali/expected.txt +++ b/test/800-smali/expected.txt @@ -9,4 +9,5 @@ invoke-super abstract BadCaseInOpRegRegReg CmpLong FloatIntConstPassing +b/18718277 Done! diff --git a/test/800-smali/smali/b_18718277.smali b/test/800-smali/smali/b_18718277.smali new file mode 100644 index 0000000000..b14ad2081e --- /dev/null +++ b/test/800-smali/smali/b_18718277.smali @@ -0,0 +1,29 @@ +.class public LB18718277; + +.super Ljava/lang/Object; + +.method public static helper(I)I + .locals 1 + add-int/lit8 v0, p0, 2 + neg-int v0, v0 + return v0 +.end method + +.method public static getInt()I + .registers 2 + const/4 v1, 3 + invoke-static {v1}, LB18718277;->helper(I)I + move-result v0 + :outer_loop + if-eqz v1, :exit_outer_loop + const/4 v0, 0 + if-eqz v0, :skip_dead_loop + :dead_loop + add-int/2addr v0, v0 + if-gez v0, :dead_loop + :skip_dead_loop + add-int/lit8 v1, v1, -1 + goto :outer_loop + :exit_outer_loop + return v0 +.end method diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java index a2db05135d..70641b2069 100644 --- a/test/800-smali/src/Main.java +++ b/test/800-smali/src/Main.java @@ -65,6 +65,7 @@ public class Main { testCases.add(new TestCase("BadCaseInOpRegRegReg", "BadCaseInOpRegRegReg", "getInt", null, null, 2)); testCases.add(new TestCase("CmpLong", "CmpLong", "run", null, null, 0)); testCases.add(new TestCase("FloatIntConstPassing", "FloatIntConstPassing", "run", null, null, 2)); + testCases.add(new TestCase("b/18718277", "B18718277", "getInt", null, null, 0)); } public void runTests() { |