diff options
53 files changed, 1708 insertions, 479 deletions
diff --git a/Android.mk b/Android.mk index 0eaca7f921..3324458bf5 100644 --- a/Android.mk +++ b/Android.mk @@ -97,6 +97,9 @@ include $(art_build_path)/Android.oat.mk ART_HOST_DEPENDENCIES := $(ART_HOST_EXECUTABLES) $(HOST_OUT_JAVA_LIBRARIES)/core-libart-hostdex.jar ART_HOST_DEPENDENCIES += $(HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) ART_TARGET_DEPENDENCIES := $(ART_TARGET_EXECUTABLES) $(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar $(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so +ifdef TARGET_2ND_ARCH +ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so +endif ######################################################################## # test targets diff --git a/build/Android.common.mk b/build/Android.common.mk index 3961219d07..ae54efb061 100644 --- a/build/Android.common.mk +++ b/build/Android.common.mk @@ -147,8 +147,6 @@ ART_TEST_OUT := $(TARGET_OUT_DATA)/art-test 2ND_TARGET_ARCH := $(TARGET_2ND_ARCH) ART_PHONY_TEST_TARGET_SUFFIX := 2ND_ART_PHONY_TEST_TARGET_SUFFIX := -ART_TARGET_BINARY_SUFFIX := -2ND_ART_TARGET_BINARY_SUFFIX := ifdef TARGET_2ND_ARCH art_test_primary_suffix := art_test_secondary_suffix := @@ -156,7 +154,6 @@ ifdef TARGET_2ND_ARCH art_test_primary_suffix := 64 ART_PHONY_TEST_TARGET_SUFFIX := 64 2ND_ART_PHONY_TEST_TARGET_SUFFIX := 32 - ART_TARGET_BINARY_SUFFIX := 64 ART_TARGET_ARCH_32 := $(TARGET_2ND_ARCH) ART_TARGET_ARCH_64 := $(TARGET_ARCH) else diff --git a/compiler/Android.mk b/compiler/Android.mk index e9010c9470..cb9e41a4fb 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -59,8 +59,8 @@ LIBART_COMPILER_SRC_FILES := \ dex/mir_field_info.cc \ dex/mir_method_info.cc \ dex/mir_optimization.cc \ - dex/pass_driver.cc \ dex/bb_optimizations.cc \ + dex/pass_driver_me.cc \ dex/bit_vector_block_iterator.cc \ dex/frontend.cc \ dex/mir_graph.cc \ diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc index abfa7a7eb7..1852f805f4 100644 --- a/compiler/dex/bb_optimizations.cc +++ b/compiler/dex/bb_optimizations.cc @@ -23,7 +23,13 @@ namespace art { /* * Code Layout pass implementation start. */ -bool CodeLayout::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { +bool CodeLayout::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->LayoutBlocks(bb); // No need of repeating, so just return false. return false; @@ -32,13 +38,22 @@ bool CodeLayout::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { /* * SSATransformation pass implementation start. */ -bool SSATransformation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { +bool SSATransformation::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->InsertPhiNodeOperands(bb); // No need of repeating, so just return false. return false; } -void SSATransformation::End(CompilationUnit* cUnit) const { +void SSATransformation::End(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); // Verify the dataflow information after the pass. if (cUnit->enable_debug & (1 << kDebugVerifyDataflow)) { cUnit->mir_graph->VerifyDataflow(); @@ -48,7 +63,13 @@ void SSATransformation::End(CompilationUnit* cUnit) const { /* * ConstantPropagation pass implementation start */ -bool ConstantPropagation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { +bool ConstantPropagation::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->DoConstantPropagation(bb); // No need of repeating, so just return false. return false; @@ -57,7 +78,10 @@ bool ConstantPropagation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb /* * MethodUseCount pass implementation start. */ -bool MethodUseCount::Gate(const CompilationUnit* cUnit) const { +bool MethodUseCount::Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); // First initialize the data. cUnit->mir_graph->InitializeMethodUses(); @@ -67,7 +91,13 @@ bool MethodUseCount::Gate(const CompilationUnit* cUnit) const { return res; } -bool MethodUseCount::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { +bool MethodUseCount::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->CountUses(bb); // No need of repeating, so just return false. return false; @@ -76,7 +106,13 @@ bool MethodUseCount::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) con /* * BasicBlock Combine pass implementation start. */ -bool BBCombine::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { +bool BBCombine::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->CombineBlocks(bb); // No need of repeating, so just return false. @@ -86,7 +122,10 @@ bool BBCombine::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { /* * BasicBlock Optimization pass implementation start. */ -void BBOptimizations::Start(CompilationUnit* cUnit) const { +void BBOptimizations::Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); /* * This pass has a different ordering depEnding on the suppress exception, * so do the pass here for now: diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h index 6d500a56ec..43dcdf4504 100644 --- a/compiler/dex/bb_optimizations.h +++ b/compiler/dex/bb_optimizations.h @@ -18,7 +18,7 @@ #define ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_ #include "compiler_internals.h" -#include "pass.h" +#include "pass_me.h" namespace art { @@ -26,16 +26,22 @@ namespace art { * @class CacheFieldLoweringInfo * @brief Cache the lowering info for fields used by IGET/IPUT/SGET/SPUT insns. */ -class CacheFieldLoweringInfo : public Pass { +class CacheFieldLoweringInfo : public PassME { public: - CacheFieldLoweringInfo() : Pass("CacheFieldLoweringInfo", kNoNodes) { + CacheFieldLoweringInfo() : PassME("CacheFieldLoweringInfo", kNoNodes) { } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->DoCacheFieldLoweringInfo(); } - bool Gate(const CompilationUnit *cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return cUnit->mir_graph->HasFieldAccess(); } }; @@ -44,16 +50,22 @@ class CacheFieldLoweringInfo : public Pass { * @class CacheMethodLoweringInfo * @brief Cache the lowering info for methods called by INVOKEs. */ -class CacheMethodLoweringInfo : public Pass { +class CacheMethodLoweringInfo : public PassME { public: - CacheMethodLoweringInfo() : Pass("CacheMethodLoweringInfo", kNoNodes) { + CacheMethodLoweringInfo() : PassME("CacheMethodLoweringInfo", kNoNodes) { } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->DoCacheMethodLoweringInfo(); } - bool Gate(const CompilationUnit *cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return cUnit->mir_graph->HasInvokes(); } }; @@ -62,26 +74,41 @@ class CacheMethodLoweringInfo : public Pass { * @class CallInlining * @brief Perform method inlining pass. */ -class CallInlining : public Pass { +class CallInlining : public PassME { public: - CallInlining() : Pass("CallInlining") { + CallInlining() : PassME("CallInlining") { } - bool Gate(const CompilationUnit* cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return cUnit->mir_graph->InlineCallsGate(); } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->InlineCallsStart(); } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { + bool Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->InlineCalls(bb); // No need of repeating, so just return false. return false; } - void End(CompilationUnit* cUnit) const { + void End(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->InlineCallsEnd(); } }; @@ -90,48 +117,57 @@ class CallInlining : public Pass { * @class CodeLayout * @brief Perform the code layout pass. */ -class CodeLayout : public Pass { +class CodeLayout : public PassME { public: - CodeLayout() : Pass("CodeLayout", "2_post_layout_cfg") { + CodeLayout() : PassME("CodeLayout", "2_post_layout_cfg") { } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->VerifyDataflow(); } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const; + bool Worker(const PassDataHolder* data) const; }; /** * @class SSATransformation * @brief Perform an SSA representation pass on the CompilationUnit. */ -class SSATransformation : public Pass { +class SSATransformation : public PassME { public: - SSATransformation() : Pass("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") { + SSATransformation() : PassME("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") { } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const; + bool Worker(const PassDataHolder* data) const; - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->InitializeSSATransformation(); } - void End(CompilationUnit* cUnit) const; + void End(const PassDataHolder* data) const; }; /** * @class ConstantPropagation * @brief Perform a constant propagation pass. */ -class ConstantPropagation : public Pass { +class ConstantPropagation : public PassME { public: - ConstantPropagation() : Pass("ConstantPropagation") { + ConstantPropagation() : PassME("ConstantPropagation") { } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const; + bool Worker(const PassDataHolder* data) const; - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->InitializeConstantPropagation(); } }; @@ -140,12 +176,15 @@ class ConstantPropagation : public Pass { * @class InitRegLocations * @brief Initialize Register Locations. */ -class InitRegLocations : public Pass { +class InitRegLocations : public PassME { public: - InitRegLocations() : Pass("InitRegLocation", kNoNodes) { + InitRegLocations() : PassME("InitRegLocation", kNoNodes) { } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->InitRegLocations(); } }; @@ -154,53 +193,77 @@ class InitRegLocations : public Pass { * @class MethodUseCount * @brief Count the register uses of the method */ -class MethodUseCount : public Pass { +class MethodUseCount : public PassME { public: - MethodUseCount() : Pass("UseCount") { + MethodUseCount() : PassME("UseCount") { } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const; + bool Worker(const PassDataHolder* data) const; - bool Gate(const CompilationUnit* cUnit) const; + bool Gate(const PassDataHolder* data) const; }; /** * @class NullCheckEliminationAndTypeInference * @brief Null check elimination and type inference. */ -class NullCheckEliminationAndTypeInference : public Pass { +class NullCheckEliminationAndTypeInference : public PassME { public: NullCheckEliminationAndTypeInference() - : Pass("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") { + : PassME("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") { } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->EliminateNullChecksAndInferTypesStart(); } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { + bool Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); return cUnit->mir_graph->EliminateNullChecksAndInferTypes(bb); } - void End(CompilationUnit* cUnit) const { + void End(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->EliminateNullChecksAndInferTypesEnd(); } }; -class ClassInitCheckElimination : public Pass { +class ClassInitCheckElimination : public PassME { public: - ClassInitCheckElimination() : Pass("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) { + ClassInitCheckElimination() : PassME("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) { } - bool Gate(const CompilationUnit* cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return cUnit->mir_graph->EliminateClassInitChecksGate(); } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { + bool Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); return cUnit->mir_graph->EliminateClassInitChecks(bb); } - void End(CompilationUnit* cUnit) const { + void End(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->EliminateClassInitChecksEnd(); } }; @@ -209,32 +272,38 @@ class ClassInitCheckElimination : public Pass { * @class NullCheckEliminationAndTypeInference * @brief Null check elimination and type inference. */ -class BBCombine : public Pass { +class BBCombine : public PassME { public: - BBCombine() : Pass("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") { + BBCombine() : PassME("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") { } - bool Gate(const CompilationUnit* cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return ((cUnit->disable_opt & (1 << kSuppressExceptionEdges)) != 0); } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const; + bool Worker(const PassDataHolder* data) const; }; /** * @class BasicBlock Optimizations * @brief Any simple BasicBlock optimization can be put here. */ -class BBOptimizations : public Pass { +class BBOptimizations : public PassME { public: - BBOptimizations() : Pass("BBOptimizations", kNoNodes, "5_post_bbo_cfg") { + BBOptimizations() : PassME("BBOptimizations", kNoNodes, "5_post_bbo_cfg") { } - bool Gate(const CompilationUnit* cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return ((cUnit->disable_opt & (1 << kBBOpt)) == 0); } - void Start(CompilationUnit* cUnit) const; + void Start(const PassDataHolder* data) const; }; } // namespace art diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index 77b5057538..ec2556b206 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -21,7 +21,7 @@ #include "dataflow_iterator-inl.h" #include "leb128.h" #include "mirror/object.h" -#include "pass_driver.h" +#include "pass_driver_me.h" #include "runtime.h" #include "base/logging.h" #include "base/timing_logger.h" @@ -924,7 +924,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, } /* Create the pass driver and launch it */ - PassDriver pass_driver(&cu); + PassDriverME pass_driver(&cu); pass_driver.Launch(); if (cu.enable_debug & (1 << kDebugDumpCheckStats)) { diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc index 2b1c4207e8..e56e0160ca 100644 --- a/compiler/dex/local_value_numbering_test.cc +++ b/compiler/dex/local_value_numbering_test.cc @@ -144,7 +144,6 @@ class LocalValueNumberingTest : public testing::Test { mir->ssa_rep->fp_def = nullptr; // Not used by LVN. mir->dalvikInsn.opcode = def->opcode; mir->offset = i; // LVN uses offset only for debug output - mir->width = 1u; // Not used by LVN. mir->optimization_flags = 0u; if (i != 0u) { diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index ba4224ea78..4ba66771b4 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -196,7 +196,7 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset, } orig_block->last_mir_insn = prev; - prev->next = NULL; + prev->next = nullptr; /* * Update the immediate predecessor block pointer so that outgoing edges @@ -220,6 +220,7 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset, while (p != bottom_block->last_mir_insn) { p = p->next; DCHECK(p != nullptr); + p->bb = bottom_block->id; int opcode = p->dalvikInsn.opcode; /* * Some messiness here to ensure that we only enter real opcodes and only the @@ -543,7 +544,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse new_block->start_offset = insn->offset; cur_block->fall_through = new_block->id; new_block->predecessors->Insert(cur_block->id); - MIR* new_insn = static_cast<MIR*>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR)); + MIR* new_insn = NewMIR(); *new_insn = *insn; insn->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheck); @@ -629,11 +630,10 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ /* Parse all instructions and put them into containing basic blocks */ while (code_ptr < code_end) { - MIR *insn = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR)); + MIR *insn = NewMIR(); insn->offset = current_offset_; insn->m_unit_index = current_method_; int width = ParseInsn(code_ptr, &insn->dalvikInsn); - insn->width = width; Instruction::Code opcode = insn->dalvikInsn.opcode; if (opcode_count_ != NULL) { opcode_count_[static_cast<int>(opcode)]++; @@ -924,7 +924,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff fclose(file); } -/* Insert an MIR instruction to the end of a basic block */ +/* Insert an MIR instruction to the end of a basic block. */ void BasicBlock::AppendMIR(MIR* mir) { if (first_mir_insn == nullptr) { DCHECK(last_mir_insn == nullptr); @@ -935,9 +935,11 @@ void BasicBlock::AppendMIR(MIR* mir) { mir->next = nullptr; last_mir_insn = mir; } + + mir->bb = id; } -/* Insert an MIR instruction to the head of a basic block */ +/* Insert an MIR instruction to the head of a basic block. */ void BasicBlock::PrependMIR(MIR* mir) { if (first_mir_insn == nullptr) { DCHECK(last_mir_insn == nullptr); @@ -947,17 +949,53 @@ void BasicBlock::PrependMIR(MIR* mir) { mir->next = first_mir_insn; first_mir_insn = mir; } + + mir->bb = id; } -/* Insert a MIR instruction after the specified MIR */ +/* Insert a MIR instruction after the specified MIR. */ void BasicBlock::InsertMIRAfter(MIR* current_mir, MIR* new_mir) { new_mir->next = current_mir->next; current_mir->next = new_mir; if (last_mir_insn == current_mir) { - /* Is the last MIR in the block */ + /* Is the last MIR in the block? */ last_mir_insn = new_mir; } + + new_mir->bb = id; +} + +MIR* BasicBlock::FindPreviousMIR(MIR* mir) { + MIR* current = first_mir_insn; + + while (current != nullptr) { + MIR* next = current->next; + + if (next == mir) { + return current; + } + + current = next; + } + + return nullptr; +} + +void BasicBlock::InsertMIRBefore(MIR* current_mir, MIR* new_mir) { + if (first_mir_insn == current_mir) { + /* Is the first MIR in the block? */ + first_mir_insn = new_mir; + new_mir->bb = id; + } + + MIR* prev = FindPreviousMIR(current_mir); + + if (prev != nullptr) { + prev->next = new_mir; + new_mir->next = current_mir; + new_mir->bb = id; + } } MIR* BasicBlock::GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current) { @@ -1240,6 +1278,12 @@ CallInfo* MIRGraph::NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, return info; } +// Allocate a new MIR. +MIR* MIRGraph::NewMIR() { + MIR* mir = new (arena_) MIR(); + return mir; +} + // Allocate a new basic block. BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) { BasicBlock* bb = static_cast<BasicBlock*>(arena_->Alloc(sizeof(BasicBlock), @@ -1344,4 +1388,106 @@ BasicBlock* ChildBlockIterator::Next() { return nullptr; } +bool BasicBlock::RemoveMIR(MIR* mir) { + if (mir == nullptr) { + return false; + } + + // Find the MIR, and the one before it if they exist. + MIR* current = nullptr; + MIR* prev = nullptr; + + // Find the mir we are looking for. + for (current = first_mir_insn; current != nullptr; prev = current, current = current->next) { + if (current == mir) { + break; + } + } + + // Did we find it? + if (current != nullptr) { + MIR* next = current->next; + + // Just update the links of prev and next and current is almost gone. + if (prev != nullptr) { + prev->next = next; + } + + // Exceptions are if first or last mirs are invoke. + if (first_mir_insn == current) { + first_mir_insn = next; + } + + if (last_mir_insn == current) { + last_mir_insn = prev; + } + + // Found it and removed it. + return true; + } + + // We did not find it. + return false; +} + +MIR* MIR::Copy(MIRGraph* mir_graph) { + MIR* res = mir_graph->NewMIR(); + *res = *this; + + // Remove links + res->next = nullptr; + res->bb = NullBasicBlockId; + res->ssa_rep = nullptr; + + return res; +} + +MIR* MIR::Copy(CompilationUnit* c_unit) { + return Copy(c_unit->mir_graph.get()); +} + +uint32_t SSARepresentation::GetStartUseIndex(Instruction::Code opcode) { + // Default result. + int res = 0; + + // We are basically setting the iputs to their igets counterparts. + switch (opcode) { + case Instruction::IPUT: + case Instruction::IPUT_OBJECT: + case Instruction::IPUT_BOOLEAN: + case Instruction::IPUT_BYTE: + case Instruction::IPUT_CHAR: + case Instruction::IPUT_SHORT: + case Instruction::IPUT_QUICK: + case Instruction::IPUT_OBJECT_QUICK: + case Instruction::APUT: + case Instruction::APUT_OBJECT: + case Instruction::APUT_BOOLEAN: + case Instruction::APUT_BYTE: + case Instruction::APUT_CHAR: + case Instruction::APUT_SHORT: + case Instruction::SPUT: + case Instruction::SPUT_OBJECT: + case Instruction::SPUT_BOOLEAN: + case Instruction::SPUT_BYTE: + case Instruction::SPUT_CHAR: + case Instruction::SPUT_SHORT: + // Skip the VR containing what to store. + res = 1; + break; + case Instruction::IPUT_WIDE: + case Instruction::IPUT_WIDE_QUICK: + case Instruction::APUT_WIDE: + case Instruction::SPUT_WIDE: + // Skip the two VRs containing what to store. + res = 2; + break; + default: + // Do nothing in the general case. + break; + } + + return res; +} + } // namespace art diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 11d2fbe039..0bb82659a2 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -242,6 +242,8 @@ struct SSARepresentation { bool* fp_use; int32_t* defs; bool* fp_def; + + static uint32_t GetStartUseIndex(Instruction::Code opcode); }; /* @@ -261,12 +263,15 @@ struct MIR { uint32_t vC; uint32_t arg[5]; /* vC/D/E/F/G in invoke or filled-new-array */ Instruction::Code opcode; + + explicit DecodedInstruction():vA(0), vB(0), vB_wide(0), vC(0), opcode(Instruction::NOP) { + } } dalvikInsn; - uint16_t width; // Note: width can include switch table or fill array data. NarrowDexOffset offset; // Offset of the instruction in code units. uint16_t optimization_flags; int16_t m_unit_index; // From which method was this MIR included + BasicBlockId bb; MIR* next; SSARepresentation* ssa_rep; union { @@ -285,6 +290,23 @@ struct MIR { // INVOKE data index, points to MIRGraph::method_lowering_infos_. uint32_t method_lowering_info; } meta; + + explicit MIR():offset(0), optimization_flags(0), m_unit_index(0), bb(NullBasicBlockId), + next(nullptr), ssa_rep(nullptr) { + memset(&meta, 0, sizeof(meta)); + } + + uint32_t GetStartUseIndex() const { + return SSARepresentation::GetStartUseIndex(dalvikInsn.opcode); + } + + MIR* Copy(CompilationUnit *c_unit); + MIR* Copy(MIRGraph* mir_Graph); + + static void* operator new(size_t size, ArenaAllocator* arena) { + return arena->Alloc(sizeof(MIR), kArenaAllocMIR); + } + static void operator delete(void* p) {} // Nop. }; struct SuccessorBlockInfo; @@ -319,6 +341,8 @@ struct BasicBlock { void AppendMIR(MIR* mir); void PrependMIR(MIR* mir); void InsertMIRAfter(MIR* current_mir, MIR* new_mir); + void InsertMIRBefore(MIR* current_mir, MIR* new_mir); + MIR* FindPreviousMIR(MIR* mir); /** * @brief Used to obtain the next MIR that follows unconditionally. @@ -329,6 +353,7 @@ struct BasicBlock { * @return Returns the following MIR if one can be found. */ MIR* GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current); + bool RemoveMIR(MIR* mir); }; /* @@ -836,6 +861,7 @@ class MIRGraph { void DumpMIRGraph(); CallInfo* NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range); BasicBlock* NewMemBB(BBType block_type, int block_id); + MIR* NewMIR(); MIR* AdvanceMIR(BasicBlock** p_bb, MIR* mir); BasicBlock* NextDominatedBlock(BasicBlock* bb); bool LayoutBlocks(BasicBlock* bb); diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc index 891d9fb7ea..86092b6e3d 100644 --- a/compiler/dex/mir_optimization_test.cc +++ b/compiler/dex/mir_optimization_test.cc @@ -170,7 +170,6 @@ class ClassInitCheckEliminationTest : public testing::Test { } mir->ssa_rep = nullptr; mir->offset = 2 * i; // All insns need to be at least 2 code units long. - mir->width = 2u; mir->optimization_flags = 0u; merged_df_flags |= MIRGraph::GetDataFlowAttributes(def->opcode); } diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h index 9457d5be76..ac22294811 100644 --- a/compiler/dex/pass.h +++ b/compiler/dex/pass.h @@ -19,49 +19,21 @@ #include <string> +#include "base/macros.h" namespace art { -// Forward declarations. -struct BasicBlock; -struct CompilationUnit; -class Pass; - -/** - * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass. - * @details Each enum should be a power of 2 to be correctly used. - */ -enum OptimizationFlag { -}; - -enum DataFlowAnalysisMode { - kAllNodes = 0, /**< @brief All nodes. */ - kPreOrderDFSTraversal, /**< @brief Depth-First-Search / Pre-Order. */ - kRepeatingPreOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Pre-Order. */ - kReversePostOrderDFSTraversal, /**< @brief Depth-First-Search / Reverse Post-Order. */ - kRepeatingPostOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Post-Order. */ - kRepeatingReversePostOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */ - kPostOrderDOMTraversal, /**< @brief Dominator tree / Post-Order. */ - kNoNodes, /**< @brief Skip BasicBlock traversal. */ +// Empty Pass Data Class, can be extended by any pass extending the base Pass class. +class PassDataHolder { }; /** * @class Pass - * @brief Pass is the Pass structure for the optimizations. - * @details The following structure has the different optimization passes that we are going to do. + * @brief Base Pass class, can be extended to perform a more defined way of doing the work call. */ class Pass { public: - explicit Pass(const char* name, DataFlowAnalysisMode type = kAllNodes, - unsigned int flags = 0u, const char* dump = "") - : pass_name_(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) { - } - - Pass(const char* name, DataFlowAnalysisMode type, const char* dump) - : pass_name_(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) { - } - - Pass(const char* name, const char* dump) - : pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) { + explicit Pass(const char* name) + : pass_name_(name) { } virtual ~Pass() { @@ -71,59 +43,42 @@ class Pass { return pass_name_; } - virtual DataFlowAnalysisMode GetTraversal() const { - return traversal_type_; - } - - virtual bool GetFlag(OptimizationFlag flag) const { - return (flags_ & flag); - } - - const char* GetDumpCFGFolder() const { - return dump_cfg_folder_; - } - /** * @brief Gate for the pass: determines whether to execute the pass or not considering a CompilationUnit - * @param c_unit the CompilationUnit. - * @return whether or not to execute the pass + * @param data the PassDataHolder. + * @return whether or not to execute the pass. */ - virtual bool Gate(const CompilationUnit* c_unit) const { + virtual bool Gate(const PassDataHolder* data) const { // Unused parameter. - UNUSED(c_unit); + UNUSED(data); // Base class says yes. return true; } /** - * @brief Start of the pass: called before the WalkBasicBlocks function - * @param c_unit the considered CompilationUnit. + * @brief Start of the pass: called before the Worker function. */ - virtual void Start(CompilationUnit* c_unit) const { + virtual void Start(const PassDataHolder* data) const { // Unused parameter. - UNUSED(c_unit); + UNUSED(data); } /** - * @brief End of the pass: called after the WalkBasicBlocks function - * @param c_unit the considered CompilationUnit. + * @brief End of the pass: called after the WalkBasicBlocks function. */ - virtual void End(CompilationUnit* c_unit) const { + virtual void End(const PassDataHolder* data) const { // Unused parameter. - UNUSED(c_unit); + UNUSED(data); } /** - * @brief Actually walk the BasicBlocks following a particular traversal type. - * @param c_unit the CompilationUnit. - * @param bb the BasicBlock. + * @param data the object containing data necessary for the pass. * @return whether or not there is a change when walking the BasicBlock */ - virtual bool WalkBasicBlocks(CompilationUnit* c_unit, BasicBlock* bb) const { - // Unused parameters. - UNUSED(c_unit); - UNUSED(bb); + virtual bool Worker(const PassDataHolder* data) const { + // Unused parameter. + UNUSED(data); // BasicBlock did not change. return false; @@ -133,15 +88,6 @@ class Pass { /** @brief The pass name: used for searching for a pass when running a particular pass or debugging. */ const char* const pass_name_; - /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */ - const DataFlowAnalysisMode traversal_type_; - - /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */ - const unsigned int flags_; - - /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */ - const char* const dump_cfg_folder_; - private: // In order to make the all passes not copy-friendly. DISALLOW_COPY_AND_ASSIGN(Pass); diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h index 2b7196e187..aa0d1ae462 100644 --- a/compiler/dex/pass_driver.h +++ b/compiler/dex/pass_driver.h @@ -22,77 +22,169 @@ #include "safe_map.h" // Forward Declarations. -class CompilationUnit; class Pass; - +class PassDriver; namespace art { +/** + * @brief Helper function to create a single instance of a given Pass and can be shared across + * the threads. + */ +template <typename PassType> +const Pass* GetPassInstance() { + static const PassType pass; + return &pass; +} + +// Empty holder for the constructor. +class PassDriverDataHolder { +}; /** * @class PassDriver - * @brief PassDriver is the wrapper around all Pass instances in order to execute them from the Middle-End + * @brief PassDriver is the wrapper around all Pass instances in order to execute them */ +template <typename PassDriverType> class PassDriver { public: - explicit PassDriver(CompilationUnit* cu, bool create_default_passes = true); + explicit PassDriver() { + InitializePasses(); + } - ~PassDriver(); + virtual ~PassDriver() { + } /** * @brief Insert a Pass: can warn if multiple passes have the same name. - * @param new_pass the new Pass to insert in the map and list. - * @param warn_override warn if the name of the Pass is already used. */ - void InsertPass(const Pass* new_pass); + void InsertPass(const Pass* new_pass) { + DCHECK(new_pass != nullptr); + DCHECK(new_pass->GetName() != nullptr && new_pass->GetName()[0] != 0); + + // It is an error to override an existing pass. + DCHECK(GetPass(new_pass->GetName()) == nullptr) + << "Pass name " << new_pass->GetName() << " already used."; + + // Now add to the list. + pass_list_.push_back(new_pass); + } /** * @brief Run a pass using the name as key. - * @param c_unit the considered CompilationUnit. - * @param pass_name the Pass name. * @return whether the pass was applied. */ - bool RunPass(CompilationUnit* c_unit, const char* pass_name); + virtual bool RunPass(const char* pass_name) { + // Paranoid: c_unit cannot be nullptr and we need a pass name. + DCHECK(pass_name != nullptr && pass_name[0] != 0); + + const Pass* cur_pass = GetPass(pass_name); + + if (cur_pass != nullptr) { + return RunPass(cur_pass); + } + + // Return false, we did not find the pass. + return false; + } /** - * @brief Run a pass using the Pass itself. - * @param time_split do we want a time split request(default: false)? - * @return whether the pass was applied. + * @brief Runs all the passes with the pass_list_. */ - bool RunPass(CompilationUnit* c_unit, const Pass* pass, bool time_split = false); + void Launch() { + for (const Pass* cur_pass : pass_list_) { + RunPass(cur_pass); + } + } - void Launch(); + /** + * @brief Searches for a particular pass. + * @param the name of the pass to be searched for. + */ + const Pass* GetPass(const char* name) const { + for (const Pass* cur_pass : pass_list_) { + if (strcmp(name, cur_pass->GetName()) == 0) { + return cur_pass; + } + } + return nullptr; + } - void HandlePassFlag(CompilationUnit* c_unit, const Pass* pass); + static void CreateDefaultPassList(const std::string& disable_passes) { + // Insert each pass from g_passes into g_default_pass_list. + PassDriverType::g_default_pass_list.clear(); + PassDriverType::g_default_pass_list.reserve(PassDriver<PassDriverType>::g_passes_size); + for (uint16_t i = 0; i < PassDriver<PassDriverType>::g_passes_size; ++i) { + const Pass* pass = PassDriver<PassDriverType>::g_passes[i]; + // Check if we should disable this pass. + if (disable_passes.find(pass->GetName()) != std::string::npos) { + LOG(INFO) << "Skipping " << pass->GetName(); + } else { + PassDriver<PassDriverType>::g_default_pass_list.push_back(pass); + } + } + } /** - * @brief Apply a patch: perform start/work/end functions. + * @brief Run a pass using the Pass itself. + * @param time_split do we want a time split request(default: false)? + * @return whether the pass was applied. */ - void ApplyPass(CompilationUnit* c_unit, const Pass* pass); + virtual bool RunPass(const Pass* pass, bool time_split = false) = 0; /** - * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode + * @brief Print the pass names of all the passes available. */ - void DispatchPass(CompilationUnit* c_unit, const Pass* pass); + static void PrintPassNames() { + LOG(INFO) << "Loop Passes are:"; - static void PrintPassNames(); - static void CreateDefaultPassList(const std::string& disable_passes); + for (const Pass* cur_pass : PassDriver<PassDriverType>::g_default_pass_list) { + LOG(INFO) << "\t-" << cur_pass->GetName(); + } + } - const Pass* GetPass(const char* name) const; + protected: + /** + * @brief Gets the list of passes currently schedule to execute. + * @return pass_list_ + */ + std::vector<const Pass*>& GetPasses() { + return pass_list_; + } - const char* GetDumpCFGFolder() const { - return dump_cfg_folder_; + virtual void InitializePasses() { + SetDefaultPasses(); } - protected: - void CreatePasses(); + void SetDefaultPasses() { + pass_list_ = PassDriver<PassDriverType>::g_default_pass_list; + } + + /** + * @brief Apply a patch: perform start/work/end functions. + */ + virtual void ApplyPass(PassDataHolder* data, const Pass* pass) { + pass->Start(data); + DispatchPass(pass); + pass->End(data); + } + /** + * @brief Dispatch a patch. + * Gives the ability to add logic when running the patch. + */ + virtual void DispatchPass(const Pass* pass) { + UNUSED(pass); + } /** @brief List of passes: provides the order to execute the passes. */ std::vector<const Pass*> pass_list_; - /** @brief The CompilationUnit on which to execute the passes on. */ - CompilationUnit* const cu_; + /** @brief The number of passes within g_passes. */ + static const uint16_t g_passes_size; + + /** @brief The number of passes within g_passes. */ + static const Pass* const g_passes[]; - /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */ - const char* dump_cfg_folder_; + /** @brief The default pass list is used to initialize pass_list_. */ + static std::vector<const Pass*> g_default_pass_list; }; } // namespace art diff --git a/compiler/dex/pass_driver_me.cc b/compiler/dex/pass_driver_me.cc new file mode 100644 index 0000000000..d0545004f7 --- /dev/null +++ b/compiler/dex/pass_driver_me.cc @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "base/macros.h" +#include "bb_optimizations.h" +#include "compiler_internals.h" +#include "dataflow_iterator.h" +#include "dataflow_iterator-inl.h" +#include "pass_driver_me.h" + +namespace art { + +namespace { // anonymous namespace + +void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass, DataflowIterator* iterator) { + // Paranoid: Check the iterator before walking the BasicBlocks. + DCHECK(iterator != nullptr); + bool change = false; + for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) { + data->bb = bb; + change = pass->Worker(data); + } +} + +template <typename Iterator> +inline void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass) { + DCHECK(data != nullptr); + CompilationUnit* c_unit = data->c_unit; + DCHECK(c_unit != nullptr); + Iterator iterator(c_unit->mir_graph.get()); + DoWalkBasicBlocks(data, pass, &iterator); +} +} // anonymous namespace + +/* + * Create the pass list. These passes are immutable and are shared across the threads. + * + * Advantage is that there will be no race conditions here. + * Disadvantage is the passes can't change their internal states depending on CompilationUnit: + * - This is not yet an issue: no current pass would require it. + */ +// The initial list of passes to be used by the PassDriveME. +template<> +const Pass* const PassDriver<PassDriverME>::g_passes[] = { + GetPassInstance<CacheFieldLoweringInfo>(), + GetPassInstance<CacheMethodLoweringInfo>(), + GetPassInstance<CallInlining>(), + GetPassInstance<CodeLayout>(), + GetPassInstance<SSATransformation>(), + GetPassInstance<ConstantPropagation>(), + GetPassInstance<InitRegLocations>(), + GetPassInstance<MethodUseCount>(), + GetPassInstance<NullCheckEliminationAndTypeInference>(), + GetPassInstance<ClassInitCheckElimination>(), + GetPassInstance<BBCombine>(), + GetPassInstance<BBOptimizations>(), +}; + +// The number of the passes in the initial list of Passes (g_passes). +template<> +uint16_t const PassDriver<PassDriverME>::g_passes_size = arraysize(PassDriver<PassDriverME>::g_passes); + +// The default pass list is used by the PassDriverME instance of PassDriver to initialize pass_list_. +template<> +std::vector<const Pass*> PassDriver<PassDriverME>::g_default_pass_list(PassDriver<PassDriverME>::g_passes, PassDriver<PassDriverME>::g_passes + PassDriver<PassDriverME>::g_passes_size); + +PassDriverME::PassDriverME(CompilationUnit* cu) + : PassDriver(), pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") { + pass_me_data_holder_.bb = nullptr; + pass_me_data_holder_.c_unit = cu; +} + +PassDriverME::~PassDriverME() { +} + +void PassDriverME::DispatchPass(const Pass* pass) { + VLOG(compiler) << "Dispatching " << pass->GetName(); + const PassME* me_pass = down_cast<const PassME*>(pass); + + DataFlowAnalysisMode mode = me_pass->GetTraversal(); + + switch (mode) { + case kPreOrderDFSTraversal: + DoWalkBasicBlocks<PreOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kRepeatingPreOrderDFSTraversal: + DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kRepeatingPostOrderDFSTraversal: + DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kReversePostOrderDFSTraversal: + DoWalkBasicBlocks<ReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kRepeatingReversePostOrderDFSTraversal: + DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kPostOrderDOMTraversal: + DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass); + break; + case kAllNodes: + DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass); + break; + case kNoNodes: + break; + default: + LOG(FATAL) << "Iterator mode not handled in dispatcher: " << mode; + break; + } +} + +bool PassDriverME::RunPass(const Pass* pass, bool time_split) { + // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name + DCHECK(pass != nullptr); + DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0); + CompilationUnit* c_unit = pass_me_data_holder_.c_unit; + DCHECK(c_unit != nullptr); + + // Do we perform a time split + if (time_split) { + c_unit->NewTimingSplit(pass->GetName()); + } + + // Check the pass gate first. + bool should_apply_pass = pass->Gate(&pass_me_data_holder_); + if (should_apply_pass) { + // Applying the pass: first start, doWork, and end calls. + ApplyPass(&pass_me_data_holder_, pass); + + // Do we want to log it? + if ((c_unit->enable_debug& (1 << kDebugDumpCFG)) != 0) { + // Do we have a pass folder? + const PassME* me_pass = (down_cast<const PassME*>(pass)); + const char* passFolder = me_pass->GetDumpCFGFolder(); + DCHECK(passFolder != nullptr); + + if (passFolder[0] != 0) { + // Create directory prefix. + std::string prefix = GetDumpCFGFolder(); + prefix += passFolder; + prefix += "/"; + + c_unit->mir_graph->DumpCFG(prefix.c_str(), false); + } + } + } + + // If the pass gate passed, we can declare success. + return should_apply_pass; +} + +const char* PassDriverME::GetDumpCFGFolder() const { + return dump_cfg_folder_; +} + + +} // namespace art diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h new file mode 100644 index 0000000000..0142934be2 --- /dev/null +++ b/compiler/dex/pass_driver_me.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_H_ +#define ART_COMPILER_DEX_PASS_DRIVER_ME_H_ + +#include "bb_optimizations.h" +#include "pass_driver.h" +#include "pass_me.h" + +namespace art { + +class PassDriverME: public PassDriver<PassDriverME> { + public: + explicit PassDriverME(CompilationUnit* cu); + ~PassDriverME(); + /** + * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode + */ + void DispatchPass(const Pass* pass); + bool RunPass(const Pass* pass, bool time_split = false); + const char* GetDumpCFGFolder() const; + protected: + /** @brief The data holder that contains data needed for the PassDriverME. */ + PassMEDataHolder pass_me_data_holder_; + + /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */ + const char* dump_cfg_folder_; +}; + +} // namespace art +#endif // ART_COMPILER_DEX_PASS_DRIVER_ME_H_ diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h new file mode 100644 index 0000000000..1132166a34 --- /dev/null +++ b/compiler/dex/pass_me.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_PASS_ME_H_ +#define ART_COMPILER_DEX_PASS_ME_H_ + +#include <string> +#include "pass.h" + +namespace art { + +// Forward declarations. +struct BasicBlock; +struct CompilationUnit; +class Pass; + +/** + * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass. + * @details Each enum should be a power of 2 to be correctly used. + */ +enum OptimizationFlag { +}; + +// Data holder class. +class PassMEDataHolder: public PassDataHolder { + public: + CompilationUnit* c_unit; + BasicBlock* bb; +}; + +enum DataFlowAnalysisMode { + kAllNodes = 0, /**< @brief All nodes. */ + kPreOrderDFSTraversal, /**< @brief Depth-First-Search / Pre-Order. */ + kRepeatingPreOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Pre-Order. */ + kReversePostOrderDFSTraversal, /**< @brief Depth-First-Search / Reverse Post-Order. */ + kRepeatingPostOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Post-Order. */ + kRepeatingReversePostOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */ + kPostOrderDOMTraversal, /**< @brief Dominator tree / Post-Order. */ + kNoNodes, /**< @brief Skip BasicBlock traversal. */ +}; + +/** + * @class Pass + * @brief Pass is the Pass structure for the optimizations. + * @details The following structure has the different optimization passes that we are going to do. + */ +class PassME: public Pass { + public: + explicit PassME(const char* name, DataFlowAnalysisMode type = kAllNodes, + unsigned int flags = 0u, const char* dump = "") + : Pass(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) { + } + + PassME(const char* name, DataFlowAnalysisMode type, const char* dump) + : Pass(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) { + } + + PassME(const char* name, const char* dump) + : Pass(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) { + } + + ~PassME() { + } + + virtual DataFlowAnalysisMode GetTraversal() const { + return traversal_type_; + } + + const char* GetDumpCFGFolder() const { + return dump_cfg_folder_; + } + + bool GetFlag(OptimizationFlag flag) const { + return (flags_ & flag); + } + + protected: + /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */ + const DataFlowAnalysisMode traversal_type_; + + /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */ + const unsigned int flags_; + + /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */ + const char* const dump_cfg_folder_; +}; +} // namespace art +#endif // ART_COMPILER_DEX_PASS_ME_H_ diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 6ccf252a14..256135df71 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -364,6 +364,18 @@ LIR* Mir2Lir::ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi) { return NULL; } +/* Search the existing constants in the literal pool for an exact method match */ +LIR* Mir2Lir::ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method) { + while (data_target) { + if (static_cast<uint32_t>(data_target->operands[0]) == method.dex_method_index && + UnwrapPointer(data_target->operands[1]) == method.dex_file) { + return data_target; + } + data_target = data_target->next; + } + return nullptr; +} + /* * The following are building blocks to insert constants into the pool or * instruction streams. @@ -1143,11 +1155,13 @@ void Mir2Lir::AddSlowPath(LIRSlowPath* slowpath) { void Mir2Lir::LoadCodeAddress(const MethodReference& target_method, InvokeType type, SpecialTargetRegister symbolic_reg) { - int target_method_idx = target_method.dex_method_index; - LIR* data_target = ScanLiteralPool(code_literal_list_, target_method_idx, 0); + LIR* data_target = ScanLiteralPoolMethod(code_literal_list_, target_method); if (data_target == NULL) { - data_target = AddWordData(&code_literal_list_, target_method_idx); + data_target = AddWordData(&code_literal_list_, target_method.dex_method_index); data_target->operands[1] = WrapPointer(const_cast<DexFile*>(target_method.dex_file)); + // NOTE: The invoke type doesn't contribute to the literal identity. In fact, we can have + // the same method invoked with kVirtual, kSuper and kInterface but the class linker will + // resolve these invokes to the same method, so we don't care which one we record here. data_target->operands[2] = type; } LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target); @@ -1157,11 +1171,13 @@ void Mir2Lir::LoadCodeAddress(const MethodReference& target_method, InvokeType t void Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type, SpecialTargetRegister symbolic_reg) { - int target_method_idx = target_method.dex_method_index; - LIR* data_target = ScanLiteralPool(method_literal_list_, target_method_idx, 0); + LIR* data_target = ScanLiteralPoolMethod(method_literal_list_, target_method); if (data_target == NULL) { - data_target = AddWordData(&method_literal_list_, target_method_idx); + data_target = AddWordData(&method_literal_list_, target_method.dex_method_index); data_target->operands[1] = WrapPointer(const_cast<DexFile*>(target_method.dex_file)); + // NOTE: The invoke type doesn't contribute to the literal identity. In fact, we can have + // the same method invoked with kVirtual, kSuper and kInterface but the class linker will + // resolve these invokes to the same method, so we don't care which one we record here. data_target->operands[2] = type; } LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target); diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 3ec31ba7d9..526c981ae9 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -35,15 +35,9 @@ namespace art { namespace { // anonymous namespace MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke, MIR* move_return) { - ArenaAllocator* arena = mir_graph->GetArena(); - MIR* insn = static_cast<MIR*>(arena->Alloc(sizeof(MIR), kArenaAllocMIR)); + MIR* insn = mir_graph->NewMIR(); insn->offset = invoke->offset; - insn->width = invoke->width; insn->optimization_flags = MIR_CALLEE; - if (move_return != nullptr) { - DCHECK_EQ(move_return->offset, invoke->offset + invoke->width); - insn->width += move_return->width; - } return insn; } @@ -660,7 +654,6 @@ bool DexFileMethodInliner::GenInlineIGet(MIRGraph* mir_graph, BasicBlock* bb, MI } MIR* insn = AllocReplacementMIR(mir_graph, invoke, move_result); - insn->width += insn->offset - invoke->offset; insn->offset = invoke->offset; insn->dalvikInsn.opcode = opcode; insn->dalvikInsn.vA = move_result->dalvikInsn.vA; @@ -737,9 +730,7 @@ bool DexFileMethodInliner::GenInlineIPut(MIRGraph* mir_graph, BasicBlock* bb, MI if (move_result != nullptr) { MIR* move = AllocReplacementMIR(mir_graph, invoke, move_result); - insn->width = invoke->width; move->offset = move_result->offset; - move->width = move_result->width; if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT) { move->dalvikInsn.opcode = Instruction::MOVE_FROM16; } else if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) { diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 3e0ba7517a..3584c33291 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -617,6 +617,7 @@ class Mir2Lir : public Backend { LIR* NewLIR5(int opcode, int dest, int src1, int src2, int info1, int info2); LIR* ScanLiteralPool(LIR* data_target, int value, unsigned int delta); LIR* ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi); + LIR* ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method); LIR* AddWordData(LIR* *constant_list_p, int value); LIR* AddWideData(LIR* *constant_list_p, int val_lo, int val_hi); void ProcessSwitchTables(); diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc index 5aa093a494..865311b084 100644 --- a/compiler/dex/ssa_transformation.cc +++ b/compiler/dex/ssa_transformation.cc @@ -557,8 +557,7 @@ void MIRGraph::InsertPhiNodes() { if (!phi_bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) { continue; } - MIR *phi = - static_cast<MIR*>(arena_->Alloc(sizeof(MIR), kArenaAllocDFInfo)); + MIR *phi = NewMIR(); phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi); phi->dalvikInsn.vA = dalvik_reg; phi->offset = phi_bb->start_offset; diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index eb62f1b577..0f41d2b2f6 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1152,28 +1152,22 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType *type = sharp_type; } } else { - if (compiling_boot) { + bool method_in_image = compiling_boot || + Runtime::Current()->GetHeap()->FindSpaceFromObject(method, false)->IsImageSpace(); + if (method_in_image) { + CHECK(!method->IsAbstract()); *type = sharp_type; - *direct_method = -1; - *direct_code = -1; + *direct_method = compiling_boot ? -1 : reinterpret_cast<uintptr_t>(method); + *direct_code = compiling_boot ? -1 : compiler_->GetEntryPointOf(method); + target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile(); + target_method->dex_method_index = method->GetDexMethodIndex(); + } else if (!must_use_direct_pointers) { + // Set the code and rely on the dex cache for the method. + *type = sharp_type; + *direct_code = compiler_->GetEntryPointOf(method); } else { - bool method_in_image = - Runtime::Current()->GetHeap()->FindSpaceFromObject(method, false)->IsImageSpace(); - if (method_in_image) { - CHECK(!method->IsAbstract()); - *type = sharp_type; - *direct_method = reinterpret_cast<uintptr_t>(method); - *direct_code = compiler_->GetEntryPointOf(method); - target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile(); - target_method->dex_method_index = method->GetDexMethodIndex(); - } else if (!must_use_direct_pointers) { - // Set the code and rely on the dex cache for the method. - *type = sharp_type; - *direct_code = compiler_->GetEntryPointOf(method); - } else { - // Direct pointers were required but none were available. - VLOG(compiler) << "Dex cache devirtualization failed for: " << PrettyMethod(method); - } + // Direct pointers were required but none were available. + VLOG(compiler) << "Dex cache devirtualization failed for: " << PrettyMethod(method); } } } @@ -1369,7 +1363,7 @@ class ParallelCompilationManager { self->AssertNoPendingException(); CHECK_GT(work_units, 0U); - index_ = begin; + index_.StoreRelaxed(begin); for (size_t i = 0; i < work_units; ++i) { thread_pool_->AddTask(self, new ForAllClosure(this, end, callback)); } @@ -1384,7 +1378,7 @@ class ParallelCompilationManager { } size_t NextIndex() { - return index_.FetchAndAdd(1); + return index_.FetchAndAddSequentiallyConsistent(1); } private: diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 9914875d5e..f0b575041d 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -33,7 +33,7 @@ #include "compiler.h" #include "compiler_callbacks.h" #include "dex_file-inl.h" -#include "dex/pass_driver.h" +#include "dex/pass_driver_me.h" #include "dex/verification_results.h" #include "driver/compiler_callbacks_impl.h" #include "driver/compiler_driver.h" @@ -918,10 +918,10 @@ static int dex2oat(int argc, char** argv) { } else if (option == "--no-profile-file") { // No profile } else if (option == "--print-pass-names") { - PassDriver::PrintPassNames(); + PassDriverME::PrintPassNames(); } else if (option.starts_with("--disable-passes=")) { std::string disable_passes = option.substr(strlen("--disable-passes=")).data(); - PassDriver::CreateDefaultPassList(disable_passes); + PassDriverME::CreateDefaultPassList(disable_passes); } else { Usage("Unknown argument %s", option.data()); } diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 346b08c741..ac922ddecd 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -508,26 +508,42 @@ NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFr ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode /* - * TODO arm64 specifics need to be fleshed out. * All generated callsites for interface invokes and invocation slow paths will load arguments - * as usual - except instead of loading x0 with the target Method*, x0 will contain - * the method_idx. This wrapper will save x1-x3, load the caller's Method*, align the + * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain + * the method_idx. This wrapper will save arg1-arg3, load the caller's Method*, align the * stack and call the appropriate C helper. - * NOTE: "this" is first visible argument of the target, and so can be found in x1. + * NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1. * - * The helper will attempt to locate the target and return a result in x0 consisting + * The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting * of the target Method* in x0 and method->code_ in x1. * - * If unsuccessful, the helper will return NULL/NULL. There will be a pending exception in the + * If unsuccessful, the helper will return NULL/????. There will be a pending exception in the * thread and we branch to another stub to deliver it. * * On success this wrapper will restore arguments and *jump* to the target, leaving the lr * pointing back to the original caller. + * + * Adapted from ARM32 code. + * + * Clobbers x12. */ .macro INVOKE_TRAMPOLINE c_name, cxx_name .extern \cxx_name ENTRY \c_name - brk 0 + SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME // save callee saves in case allocation triggers GC + // Helper signature is always + // (method_idx, *this_object, *caller_method, *self, sp) + + ldr x2, [sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE] // pass caller Method* + mov x3, xSELF // pass Thread::Current + mov x4, sp + bl \cxx_name // (method_idx, this, caller, Thread*, SP) + mov x12, x1 // save Method*->code_ + RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME + cbz x0, 1f // did we find the target? if not go to exception delivery + br x12 // tail call to target +1: + DELIVER_PENDING_EXCEPTION END \c_name .endm @@ -1381,8 +1397,17 @@ ENTRY art_quick_proxy_invoke_handler DELIVER_PENDING_EXCEPTION END art_quick_proxy_invoke_handler -UNIMPLEMENTED art_quick_imt_conflict_trampoline - + /* + * Called to resolve an imt conflict. x12 is a hidden argument that holds the target method's + * dex method index. + */ +ENTRY art_quick_imt_conflict_trampoline + ldr x0, [sp, #0] // load caller Method* + ldr w0, [x0, #METHOD_DEX_CACHE_METHODS_OFFSET] // load dex_cache_resolved_methods + add x0, x0, #OBJECT_ARRAY_DATA_OFFSET // get starting address of data + ldr w0, [x0, x12, lsl 2] // load the target method + b art_quick_invoke_interface_trampoline +END art_quick_imt_conflict_trampoline ENTRY art_quick_resolution_trampoline SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index b22ca82473..fac988310a 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -16,6 +16,8 @@ #include "common_runtime_test.h" #include "mirror/art_field-inl.h" +#include "mirror/art_method-inl.h" +#include "mirror/class-inl.h" #include "mirror/string-inl.h" #include <cstdio> @@ -50,6 +52,7 @@ class StubTest : public CommonRuntimeTest { pair.first = "-Xmx4M"; // Smallest we can go. } } + options->push_back(std::make_pair("-Xint", nullptr)); } // Helper function needed since TEST_F makes a new class. @@ -283,6 +286,234 @@ class StubTest : public CommonRuntimeTest { return result; } + // TODO: Set up a frame according to referrer's specs. + size_t Invoke3WithReferrerAndHidden(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, + Thread* self, mirror::ArtMethod* referrer, size_t hidden) { + // Push a transition back into managed code onto the linked list in thread. + ManagedStack fragment; + self->PushManagedStackFragment(&fragment); + + size_t result; + size_t fpr_result = 0; +#if defined(__i386__) + // TODO: Set the thread? + __asm__ __volatile__( + "movd %[hidden], %%xmm0\n\t" + "pushl %[referrer]\n\t" // Store referrer + "call *%%edi\n\t" // Call the stub + "addl $4, %%esp" // Pop referrer + : "=a" (result) + // Use the result from eax + : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"m"(referrer), [hidden]"r"(hidden) + // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx + : ); // clobber. + // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs, + // but compilation fails when declaring that. +#elif defined(__arm__) + __asm__ __volatile__( + "push {r1-r12, lr}\n\t" // Save state, 13*4B = 52B + ".cfi_adjust_cfa_offset 52\n\t" + "push {r9}\n\t" + ".cfi_adjust_cfa_offset 4\n\t" + "mov r9, %[referrer]\n\n" + "str r9, [sp, #-8]!\n\t" // Push referrer, +8B padding so 16B aligned + ".cfi_adjust_cfa_offset 8\n\t" + "ldr r9, [sp, #8]\n\t" + + // Push everything on the stack, so we don't rely on the order. What a mess. :-( + "sub sp, sp, #24\n\t" + "str %[arg0], [sp]\n\t" + "str %[arg1], [sp, #4]\n\t" + "str %[arg2], [sp, #8]\n\t" + "str %[code], [sp, #12]\n\t" + "str %[self], [sp, #16]\n\t" + "str %[hidden], [sp, #20]\n\t" + "ldr r0, [sp]\n\t" + "ldr r1, [sp, #4]\n\t" + "ldr r2, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "ldr r9, [sp, #16]\n\t" + "ldr r12, [sp, #20]\n\t" + "add sp, sp, #24\n\t" + + "blx r3\n\t" // Call the stub + "add sp, sp, #12\n\t" // Pop nullptr and padding + ".cfi_adjust_cfa_offset -12\n\t" + "pop {r1-r12, lr}\n\t" // Restore state + ".cfi_adjust_cfa_offset -52\n\t" + "mov %[result], r0\n\t" // Save the result + : [result] "=r" (result) + // Use the result from r0 + : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self), + [referrer] "r"(referrer), [hidden] "r"(hidden) + : ); // clobber. +#elif defined(__aarch64__) + __asm__ __volatile__( + // Spill space for d8 - d15 + "sub sp, sp, #64\n\t" + ".cfi_adjust_cfa_offset 64\n\t" + "stp d8, d9, [sp]\n\t" + "stp d10, d11, [sp, #16]\n\t" + "stp d12, d13, [sp, #32]\n\t" + "stp d14, d15, [sp, #48]\n\t" + + "sub sp, sp, #48\n\t" // Reserve stack space, 16B aligned + ".cfi_adjust_cfa_offset 48\n\t" + "stp %[referrer], x1, [sp]\n\t"// referrer, x1 + "stp x2, x3, [sp, #16]\n\t" // Save x2, x3 + "stp x18, x30, [sp, #32]\n\t" // Save x18(xSELF), xLR + + // Push everything on the stack, so we don't rely on the order. What a mess. :-( + "sub sp, sp, #48\n\t" + ".cfi_adjust_cfa_offset 48\n\t" + "str %[arg0], [sp]\n\t" + "str %[arg1], [sp, #8]\n\t" + "str %[arg2], [sp, #16]\n\t" + "str %[code], [sp, #24]\n\t" + "str %[self], [sp, #32]\n\t" + "str %[hidden], [sp, #40]\n\t" + + // Now we definitely have x0-x3 free, use it to garble d8 - d15 + "movk x0, #0xfad0\n\t" + "movk x0, #0xebad, lsl #16\n\t" + "movk x0, #0xfad0, lsl #32\n\t" + "movk x0, #0xebad, lsl #48\n\t" + "fmov d8, x0\n\t" + "add x0, x0, 1\n\t" + "fmov d9, x0\n\t" + "add x0, x0, 1\n\t" + "fmov d10, x0\n\t" + "add x0, x0, 1\n\t" + "fmov d11, x0\n\t" + "add x0, x0, 1\n\t" + "fmov d12, x0\n\t" + "add x0, x0, 1\n\t" + "fmov d13, x0\n\t" + "add x0, x0, 1\n\t" + "fmov d14, x0\n\t" + "add x0, x0, 1\n\t" + "fmov d15, x0\n\t" + + // Load call params + "ldr x0, [sp]\n\t" + "ldr x1, [sp, #8]\n\t" + "ldr x2, [sp, #16]\n\t" + "ldr x3, [sp, #24]\n\t" + "ldr x18, [sp, #32]\n\t" + "ldr x12, [sp, #40]\n\t" + "add sp, sp, #48\n\t" + ".cfi_adjust_cfa_offset -48\n\t" + + + "blr x3\n\t" // Call the stub + + // Test d8 - d15. We can use x1 and x2. + "movk x1, #0xfad0\n\t" + "movk x1, #0xebad, lsl #16\n\t" + "movk x1, #0xfad0, lsl #32\n\t" + "movk x1, #0xebad, lsl #48\n\t" + "fmov x2, d8\n\t" + "cmp x1, x2\n\t" + "b.ne 1f\n\t" + "add x1, x1, 1\n\t" + + "fmov x2, d9\n\t" + "cmp x1, x2\n\t" + "b.ne 1f\n\t" + "add x1, x1, 1\n\t" + + "fmov x2, d10\n\t" + "cmp x1, x2\n\t" + "b.ne 1f\n\t" + "add x1, x1, 1\n\t" + + "fmov x2, d11\n\t" + "cmp x1, x2\n\t" + "b.ne 1f\n\t" + "add x1, x1, 1\n\t" + + "fmov x2, d12\n\t" + "cmp x1, x2\n\t" + "b.ne 1f\n\t" + "add x1, x1, 1\n\t" + + "fmov x2, d13\n\t" + "cmp x1, x2\n\t" + "b.ne 1f\n\t" + "add x1, x1, 1\n\t" + + "fmov x2, d14\n\t" + "cmp x1, x2\n\t" + "b.ne 1f\n\t" + "add x1, x1, 1\n\t" + + "fmov x2, d15\n\t" + "cmp x1, x2\n\t" + "b.ne 1f\n\t" + + "mov %[fpr_result], #0\n\t" + + // Finish up. + "2:\n\t" + "ldp x1, x2, [sp, #8]\n\t" // Restore x1, x2 + "ldp x3, x18, [sp, #24]\n\t" // Restore x3, xSELF + "ldr x30, [sp, #40]\n\t" // Restore xLR + "add sp, sp, #48\n\t" // Free stack space + ".cfi_adjust_cfa_offset -48\n\t" + "mov %[result], x0\n\t" // Save the result + + "ldp d8, d9, [sp]\n\t" // Restore d8 - d15 + "ldp d10, d11, [sp, #16]\n\t" + "ldp d12, d13, [sp, #32]\n\t" + "ldp d14, d15, [sp, #48]\n\t" + "add sp, sp, #64\n\t" + ".cfi_adjust_cfa_offset -64\n\t" + + "b 3f\n\t" // Goto end + + // Failed fpr verification. + "1:\n\t" + "mov %[fpr_result], #1\n\t" + "b 2b\n\t" // Goto finish-up + + // End + "3:\n\t" + : [result] "=r" (result), [fpr_result] "=r" (fpr_result) + // Use the result from r0 + : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self), + [referrer] "r"(referrer), [hidden] "r"(hidden) + : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17"); // clobber. +#elif defined(__x86_64__) + // Note: Uses the native convention + // TODO: Set the thread? + __asm__ __volatile__( + "movq %[hidden], %%r9\n\t" // No need to save r9, listed as clobbered + "movd %%r9, %%xmm0\n\t" + "pushq %[referrer]\n\t" // Push referrer + "pushq (%%rsp)\n\t" // & 16B alignment padding + ".cfi_adjust_cfa_offset 16\n\t" + "call *%%rax\n\t" // Call the stub + "addq $16, %%rsp\n\t" // Pop nullptr and padding + ".cfi_adjust_cfa_offset -16\n\t" + : "=a" (result) + // Use the result from rax + : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer), [hidden] "m"(hidden) + // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax + : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); // clobber all + // TODO: Should we clobber the other registers? +#else + LOG(WARNING) << "Was asked to invoke for an architecture I do not understand."; + result = 0; +#endif + // Pop transition. + self->PopManagedStackFragment(fragment); + + fp_result = fpr_result; + EXPECT_EQ(0U, fp_result); + + return result; + } + // Method with 32b arg0, 64b arg1 size_t Invoke3UWithReferrer(size_t arg0, uint64_t arg1, uintptr_t code, Thread* self, mirror::ArtMethod* referrer) { @@ -1448,4 +1679,116 @@ TEST_F(StubTest, Fields64) { TestFields(self, this, Primitive::Type::kPrimLong); } +#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__) +extern "C" void art_quick_imt_conflict_trampoline(void); +#endif + +TEST_F(StubTest, IMT) { +#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__) + TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING(); + + Thread* self = Thread::Current(); + + ScopedObjectAccess soa(self); + StackHandleScope<7> hs(self); + + JNIEnv* env = Thread::Current()->GetJniEnv(); + + // ArrayList + + // Load ArrayList and used methods (JNI). + jclass arraylist_jclass = env->FindClass("java/util/ArrayList"); + ASSERT_NE(nullptr, arraylist_jclass); + jmethodID arraylist_constructor = env->GetMethodID(arraylist_jclass, "<init>", "()V"); + ASSERT_NE(nullptr, arraylist_constructor); + jmethodID contains_jmethod = env->GetMethodID(arraylist_jclass, "contains", "(Ljava/lang/Object;)Z"); + ASSERT_NE(nullptr, contains_jmethod); + jmethodID add_jmethod = env->GetMethodID(arraylist_jclass, "add", "(Ljava/lang/Object;)Z"); + ASSERT_NE(nullptr, add_jmethod); + + // Get mirror representation. + Handle<mirror::ArtMethod> contains_amethod(hs.NewHandle(soa.DecodeMethod(contains_jmethod))); + + // Patch up ArrayList.contains. + if (contains_amethod.Get()->GetEntryPointFromQuickCompiledCode() == nullptr) { + contains_amethod.Get()->SetEntryPointFromQuickCompiledCode(reinterpret_cast<void*>( + GetTlsPtr(self)->quick_entrypoints.pQuickToInterpreterBridge)); + } + + // List + + // Load List and used methods (JNI). + jclass list_jclass = env->FindClass("java/util/List"); + ASSERT_NE(nullptr, list_jclass); + jmethodID inf_contains_jmethod = env->GetMethodID(list_jclass, "contains", "(Ljava/lang/Object;)Z"); + ASSERT_NE(nullptr, inf_contains_jmethod); + + // Get mirror representation. + Handle<mirror::ArtMethod> inf_contains(hs.NewHandle(soa.DecodeMethod(inf_contains_jmethod))); + + // Object + + jclass obj_jclass = env->FindClass("java/lang/Object"); + ASSERT_NE(nullptr, obj_jclass); + jmethodID obj_constructor = env->GetMethodID(obj_jclass, "<init>", "()V"); + ASSERT_NE(nullptr, obj_constructor); + + // Sanity check: check that there is a conflict for List.contains in ArrayList. + + mirror::Class* arraylist_class = soa.Decode<mirror::Class*>(arraylist_jclass); + mirror::ArtMethod* m = arraylist_class->GetImTable()->Get( + inf_contains->GetDexMethodIndex() % ClassLinker::kImtSize); + + if (!m->IsImtConflictMethod()) { + LOG(WARNING) << "Test is meaningless, no IMT conflict in setup: " << + PrettyMethod(m, true); + LOG(WARNING) << "Please update StubTest.IMT."; + return; + } + + // Create instances. + + jobject jarray_list = env->NewObject(arraylist_jclass, arraylist_constructor); + ASSERT_NE(nullptr, jarray_list); + Handle<mirror::Object> array_list(hs.NewHandle(soa.Decode<mirror::Object*>(jarray_list))); + + jobject jobj = env->NewObject(obj_jclass, obj_constructor); + ASSERT_NE(nullptr, jobj); + Handle<mirror::Object> obj(hs.NewHandle(soa.Decode<mirror::Object*>(jobj))); + + // Invoke. + + size_t result = + Invoke3WithReferrerAndHidden(0U, reinterpret_cast<size_t>(array_list.Get()), + reinterpret_cast<size_t>(obj.Get()), + reinterpret_cast<uintptr_t>(&art_quick_imt_conflict_trampoline), + self, contains_amethod.Get(), + static_cast<size_t>(inf_contains.Get()->GetDexMethodIndex())); + + ASSERT_FALSE(self->IsExceptionPending()); + EXPECT_EQ(static_cast<size_t>(JNI_FALSE), result); + + // Add object. + + env->CallBooleanMethod(jarray_list, add_jmethod, jobj); + + ASSERT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException(nullptr)); + + // Invoke again. + + result = Invoke3WithReferrerAndHidden(0U, reinterpret_cast<size_t>(array_list.Get()), + reinterpret_cast<size_t>(obj.Get()), + reinterpret_cast<uintptr_t>(&art_quick_imt_conflict_trampoline), + self, contains_amethod.Get(), + static_cast<size_t>(inf_contains.Get()->GetDexMethodIndex())); + + ASSERT_FALSE(self->IsExceptionPending()); + EXPECT_EQ(static_cast<size_t>(JNI_TRUE), result); +#else + LOG(INFO) << "Skipping memcpy as I don't know how to do that on " << kRuntimeISA; + // Force-print to std::cout so it's also outside the logcat. + std::cout << "Skipping memcpy as I don't know how to do that on " << kRuntimeISA << std::endl; +#endif +} + } // namespace art diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S index 909bd3ece7..f1d07464e3 100644 --- a/runtime/arch/x86/asm_support_x86.S +++ b/runtime/arch/x86/asm_support_x86.S @@ -19,7 +19,7 @@ #include "asm_support_x86.h" -#if defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5) +#if defined(__APPLE__) || (defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)) // Clang's as(1) doesn't let you name macro parameters prior to 3.5. #define MACRO0(macro_name) .macro macro_name #define MACRO1(macro_name, macro_arg1) .macro macro_name @@ -32,8 +32,6 @@ #define PLT_VAR(name, index) SYMBOL($index) #define REG_VAR(name,index) %$index #define CALL_MACRO(name,index) $index - #define FUNCTION_TYPE(name,index) .type $index, @function - #define SIZE(name,index) .size $index, .-$index // The use of $x for arguments mean that literals need to be represented with $$x in macros. #define LITERAL(value) $value @@ -56,13 +54,22 @@ #define PLT_VAR(name, index) name&@PLT #define REG_VAR(name,index) %name #define CALL_MACRO(name,index) name& - #define FUNCTION_TYPE(name,index) .type name&, @function - #define SIZE(name,index) .size name, .-name #define LITERAL(value) $value #define MACRO_LITERAL(value) $value #endif +#if defined(__APPLE__) + #define FUNCTION_TYPE(name,index) + #define SIZE(name,index) +#elif defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5) + #define FUNCTION_TYPE(name,index) .type $index, @function + #define SIZE(name,index) .size $index, .-$index +#else + #define FUNCTION_TYPE(name,index) .type name&, @function + #define SIZE(name,index) .size name, .-name +#endif + // CFI support. #if !defined(__APPLE__) #define CFI_STARTPROC .cfi_startproc diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 9c86c75ddd..ed7f246466 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -239,24 +239,45 @@ TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromC /* * All generated callsites for interface invokes and invocation slow paths will load arguments - * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain + * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain * the method_idx. This wrapper will save arg1-arg3, load the caller's Method*, align the * stack and call the appropriate C helper. - * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1. + * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi. * - * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting - * of the target Method* in r0 and method->code_ in r1. + * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting + * of the target Method* in rax and method->code_ in rdx. * - * If unsuccessful, the helper will return NULL/NULL. There will bea pending exception in the + * If unsuccessful, the helper will return NULL/????. There will be a pending exception in the * thread and we branch to another stub to deliver it. * - * On success this wrapper will restore arguments and *jump* to the target, leaving the lr - * pointing back to the original caller. + * On success this wrapper will restore arguments and *jump* to the target, leaving the return + * location on the stack. + * + * Adapted from x86 code. */ MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name) DEFINE_FUNCTION VAR(c_name, 0) - int3 - int3 + SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME // save callee saves in case allocation triggers GC + // Helper signature is always + // (method_idx, *this_object, *caller_method, *self, sp) + + movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE(%rsp), %rdx // pass caller Method* + movq %gs:THREAD_SELF_OFFSET, %rcx // pass Thread + movq %rsp, %r8 // pass SP + + call PLT_VAR(cxx_name, 1) // cxx_name(arg1, arg2, caller method*, Thread*, SP) + // save the code pointer + movq %rax, %rdi + movq %rdx, %rax + RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME + + testq %rdi, %rdi + jz 1f + + // Tail call to intended method. + jmp *%rax +1: + DELIVER_PENDING_EXCEPTION END_FUNCTION VAR(c_name, 0) END_MACRO @@ -977,9 +998,18 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler END_FUNCTION art_quick_proxy_invoke_handler /* - * Called to resolve an imt conflict. + * Called to resolve an imt conflict. Clobbers %rax (which will be clobbered later anyways). + * + * xmm0 is a hidden argument that holds the target method's dex method index. + * TODO: With proper hard-float support, this needs to be kept in sync with the quick compiler. */ -UNIMPLEMENTED art_quick_imt_conflict_trampoline +DEFINE_FUNCTION art_quick_imt_conflict_trampoline + movq 16(%rsp), %rdi // load caller Method* + movl METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi // load dex_cache_resolved_methods + movd %xmm0, %rax // get target method index stored in xmm0 + movl OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi // load the target method + jmp art_quick_invoke_interface_trampoline_local +END_FUNCTION art_quick_imt_conflict_trampoline DEFINE_FUNCTION art_quick_resolution_trampoline SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME diff --git a/runtime/atomic.h b/runtime/atomic.h index 1f975dc0ce..9262db6724 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -17,7 +17,15 @@ #ifndef ART_RUNTIME_ATOMIC_H_ #define ART_RUNTIME_ATOMIC_H_ +#ifdef __clang__ +#define ART_HAVE_STDATOMIC 1 +#endif + #include <stdint.h> +#if ART_HAVE_STDATOMIC +#include <atomic> +#endif +#include <limits> #include <vector> #include "base/logging.h" @@ -27,6 +35,76 @@ namespace art { class Mutex; +#if ART_HAVE_STDATOMIC +template<typename T> +class Atomic : public std::atomic<T> { + public: + COMPILE_ASSERT(sizeof(T) == sizeof(std::atomic<T>), + std_atomic_size_differs_from_that_of_underlying_type); + COMPILE_ASSERT(alignof(T) == alignof(std::atomic<T>), + std_atomic_alignment_differs_from_that_of_underlying_type); + + Atomic<T>() : std::atomic<T>() { } + + explicit Atomic<T>(T value) : std::atomic<T>(value) { } + + // Load from memory without ordering or synchronization constraints. + T LoadRelaxed() const { + return this->load(std::memory_order_relaxed); + } + + // Load from memory with a total ordering. + T LoadSequentiallyConsistent() const { + return this->load(std::memory_order_seq_cst); + } + + // Store to memory without ordering or synchronization constraints. + void StoreRelaxed(T desired) { + this->store(desired, std::memory_order_relaxed); + } + + // Store to memory with a total ordering. + void StoreSequentiallyConsistent(T desired) { + this->store(desired, std::memory_order_seq_cst); + } + + // Atomically replace the value with desired value if it matches the expected value. Doesn't + // imply ordering or synchronization constraints. + bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) { + return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed); + } + + // Atomically replace the value with desired value if it matches the expected value. Prior writes + // made to other memory locations by the thread that did the release become visible in this + // thread. + bool CompareExchangeWeakAcquire(T expected_value, T desired_value) { + return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire); + } + + // Atomically replace the value with desired value if it matches the expected value. prior writes + // to other memory locations become visible to the threads that do a consume or an acquire on the + // same location. + bool CompareExchangeWeakRelease(T expected_value, T desired_value) { + return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release); + } + + T FetchAndAddSequentiallyConsistent(const T value) { + return this->fetch_add(value, std::memory_order_seq_cst); // Return old_value. + } + + T FetchAndSubSequentiallyConsistent(const T value) { + return this->fetch_sub(value, std::memory_order_seq_cst); // Return old value. + } + + volatile T* Address() { + return reinterpret_cast<T*>(this); + } + + static T MaxValue() { + return std::numeric_limits<T>::max(); + } +}; +#else template<typename T> class Atomic { public: @@ -34,24 +112,54 @@ class Atomic { explicit Atomic<T>(T value) : value_(value) { } - Atomic<T>& operator=(T desired) { - Store(desired); - return *this; + // Load from memory without ordering or synchronization constraints. + T LoadRelaxed() const { + return value_; } - T Load() const { - return value_; + // Load from memory with a total ordering. + T LoadSequentiallyConsistent() const; + + // Store to memory without ordering or synchronization constraints. + void StoreRelaxed(T desired) { + value_ = desired; + } + + // Store to memory with a total ordering. + void StoreSequentiallyConsistent(T desired); + + // Atomically replace the value with desired value if it matches the expected value. Doesn't + // imply ordering or synchronization constraints. + bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) { + // TODO: make this relaxed. + return __sync_bool_compare_and_swap(&value_, expected_value, desired_value); } - operator T() const { - return Load(); + // Atomically replace the value with desired value if it matches the expected value. Prior writes + // made to other memory locations by the thread that did the release become visible in this + // thread. + bool CompareExchangeWeakAcquire(T expected_value, T desired_value) { + // TODO: make this acquire. + return __sync_bool_compare_and_swap(&value_, expected_value, desired_value); + } + + // Atomically replace the value with desired value if it matches the expected value. prior writes + // to other memory locations become visible to the threads that do a consume or an acquire on the + // same location. + bool CompareExchangeWeakRelease(T expected_value, T desired_value) { + // TODO: make this release. + return __sync_bool_compare_and_swap(&value_, expected_value, desired_value); + } + + volatile T* Address() { + return &value_; } - T FetchAndAdd(const T value) { + T FetchAndAddSequentiallyConsistent(const T value) { return __sync_fetch_and_add(&value_, value); // Return old_value. } - T FetchAndSub(const T value) { + T FetchAndSubSequentiallyConsistent(const T value) { return __sync_fetch_and_sub(&value_, value); // Return old value. } @@ -71,22 +179,14 @@ class Atomic { return __sync_fetch_and_sub(&value_, 1); // Return old value. } - bool CompareAndSwap(T expected_value, T desired_value) { - return __sync_bool_compare_and_swap(&value_, expected_value, desired_value); - } - - volatile T* Address() { - return &value_; + static T MaxValue() { + return std::numeric_limits<T>::max(); } private: - // Unsafe = operator for non atomic operations on the integer. - void Store(T desired) { - value_ = desired; - } - - volatile T value_; + T value_; }; +#endif typedef Atomic<int32_t> AtomicInteger; @@ -260,6 +360,23 @@ class QuasiAtomic { DISALLOW_COPY_AND_ASSIGN(QuasiAtomic); }; +#if !ART_HAVE_STDATOMIC +template<typename T> +inline T Atomic<T>::LoadSequentiallyConsistent() const { + T result = value_; + QuasiAtomic::MembarLoadLoad(); + return result; +} + +template<typename T> +inline void Atomic<T>::StoreSequentiallyConsistent(T desired) { + QuasiAtomic::MembarStoreStore(); + value_ = desired; + QuasiAtomic::MembarStoreLoad(); +} + +#endif + } // namespace art #endif // ART_RUNTIME_ATOMIC_H_ diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc index 331d0c0175..086ef440a3 100644 --- a/runtime/barrier_test.cc +++ b/runtime/barrier_test.cc @@ -77,20 +77,20 @@ TEST_F(BarrierTest, CheckWait) { barrier.Increment(self, num_threads); // At this point each thread should have passed through the barrier. The first count should be // equal to num_threads. - EXPECT_EQ(num_threads, count1); + EXPECT_EQ(num_threads, count1.LoadRelaxed()); // Count 3 should still be zero since no thread should have gone past the second barrier. - EXPECT_EQ(0, count3); + EXPECT_EQ(0, count3.LoadRelaxed()); // Now lets tell the threads to pass again. barrier.Increment(self, num_threads); // Count 2 should be equal to num_threads since each thread must have passed the second barrier // at this point. - EXPECT_EQ(num_threads, count2); + EXPECT_EQ(num_threads, count2.LoadRelaxed()); // Wait for all the threads to finish. thread_pool.Wait(self, true, false); // All three counts should be equal to num_threads now. - EXPECT_EQ(count1, count2); - EXPECT_EQ(count2, count3); - EXPECT_EQ(num_threads, count3); + EXPECT_EQ(count1.LoadRelaxed(), count2.LoadRelaxed()); + EXPECT_EQ(count2.LoadRelaxed(), count3.LoadRelaxed()); + EXPECT_EQ(num_threads, count3.LoadRelaxed()); } class CheckPassTask : public Task { @@ -133,7 +133,7 @@ TEST_F(BarrierTest, CheckPass) { // Wait for all the tasks to complete using the barrier. barrier.Increment(self, expected_total_tasks); // The total number of completed tasks should be equal to expected_total_tasks. - EXPECT_EQ(count, expected_total_tasks); + EXPECT_EQ(count.LoadRelaxed(), expected_total_tasks); } } // namespace art diff --git a/runtime/base/macros.h b/runtime/base/macros.h index 81755144b9..47571f85bf 100644 --- a/runtime/base/macros.h +++ b/runtime/base/macros.h @@ -169,7 +169,7 @@ char (&ArraySizeHelper(T (&array)[N]))[N]; // bionic and glibc both have TEMP_FAILURE_RETRY, but Mac OS' libc doesn't. #ifndef TEMP_FAILURE_RETRY #define TEMP_FAILURE_RETRY(exp) ({ \ - typeof(exp) _rc; \ + decltype(exp) _rc; \ do { \ _rc = (exp); \ } while (_rc == -1 && errno == EINTR); \ diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h index a7e25cb907..adf4c66aa4 100644 --- a/runtime/base/mutex-inl.h +++ b/runtime/base/mutex-inl.h @@ -221,7 +221,7 @@ inline void ReaderWriterMutex::SharedUnlock(Thread* self) { // Reduce state by 1. done = android_atomic_release_cas(cur_state, cur_state - 1, &state_) == 0; if (done && (cur_state - 1) == 0) { // cas may fail due to noise? - if (num_pending_writers_ > 0 || num_pending_readers_ > 0) { + if (num_pending_writers_.LoadRelaxed() > 0 || num_pending_readers_ > 0) { // Wake any exclusive waiters as there are now no readers. futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0); } diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index 2bc17bf403..6f7f2c1e99 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -71,12 +71,12 @@ static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, co class ScopedAllMutexesLock { public: explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) { - while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(0, mutex)) { + while (!gAllMutexData->all_mutexes_guard.CompareExchangeWeakAcquire(0, mutex)) { NanoSleep(100); } } ~ScopedAllMutexesLock() { - while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(mutex_, 0)) { + while (!gAllMutexData->all_mutexes_guard.CompareExchangeWeakRelease(mutex_, 0)) { NanoSleep(100); } } @@ -174,34 +174,34 @@ void BaseMutex::RecordContention(uint64_t blocked_tid, uint64_t owner_tid, uint64_t nano_time_blocked) { if (kLogLockContentions) { - ContentionLogData* data = contetion_log_data_; + ContentionLogData* data = contention_log_data_; ++(data->contention_count); data->AddToWaitTime(nano_time_blocked); ContentionLogEntry* log = data->contention_log; // This code is intentionally racy as it is only used for diagnostics. - uint32_t slot = data->cur_content_log_entry; + uint32_t slot = data->cur_content_log_entry.LoadRelaxed(); if (log[slot].blocked_tid == blocked_tid && log[slot].owner_tid == blocked_tid) { ++log[slot].count; } else { uint32_t new_slot; do { - slot = data->cur_content_log_entry; + slot = data->cur_content_log_entry.LoadRelaxed(); new_slot = (slot + 1) % kContentionLogSize; - } while (!data->cur_content_log_entry.CompareAndSwap(slot, new_slot)); + } while (!data->cur_content_log_entry.CompareExchangeWeakRelaxed(slot, new_slot)); log[new_slot].blocked_tid = blocked_tid; log[new_slot].owner_tid = owner_tid; - log[new_slot].count = 1; + log[new_slot].count.StoreRelaxed(1); } } } void BaseMutex::DumpContention(std::ostream& os) const { if (kLogLockContentions) { - const ContentionLogData* data = contetion_log_data_; + const ContentionLogData* data = contention_log_data_; const ContentionLogEntry* log = data->contention_log; uint64_t wait_time = data->wait_time; - uint32_t contention_count = data->contention_count; + uint32_t contention_count = data->contention_count.LoadRelaxed(); if (contention_count == 0) { os << "never contended"; } else { @@ -213,7 +213,7 @@ void BaseMutex::DumpContention(std::ostream& os) const { for (size_t i = 0; i < kContentionLogSize; ++i) { uint64_t blocked_tid = log[i].blocked_tid; uint64_t owner_tid = log[i].owner_tid; - uint32_t count = log[i].count; + uint32_t count = log[i].count.LoadRelaxed(); if (count > 0) { auto it = most_common_blocked.find(blocked_tid); if (it != most_common_blocked.end()) { @@ -261,7 +261,7 @@ Mutex::Mutex(const char* name, LockLevel level, bool recursive) #if ART_USE_FUTEXES state_ = 0; exclusive_owner_ = 0; - num_contenders_ = 0; + DCHECK_EQ(0, num_contenders_.LoadRelaxed()); #elif defined(__BIONIC__) || defined(__APPLE__) // Use recursive mutexes for bionic and Apple otherwise the // non-recursive mutexes don't have TIDs to check lock ownership of. @@ -283,7 +283,8 @@ Mutex::~Mutex() { LOG(shutting_down ? WARNING : FATAL) << "destroying mutex with owner: " << exclusive_owner_; } else { CHECK_EQ(exclusive_owner_, 0U) << "unexpectedly found an owner on unlocked mutex " << name_; - CHECK_EQ(num_contenders_, 0) << "unexpectedly found a contender on mutex " << name_; + CHECK_EQ(num_contenders_.LoadRelaxed(), 0) + << "unexpectedly found a contender on mutex " << name_; } #else // We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread @@ -406,7 +407,7 @@ void Mutex::ExclusiveUnlock(Thread* self) { done = __sync_bool_compare_and_swap(&state_, cur_state, 0 /* new state */); if (LIKELY(done)) { // Spurious fail? // Wake a contender - if (UNLIKELY(num_contenders_ > 0)) { + if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) { futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0); } } @@ -459,7 +460,7 @@ ReaderWriterMutex::~ReaderWriterMutex() { CHECK_EQ(state_, 0); CHECK_EQ(exclusive_owner_, 0U); CHECK_EQ(num_pending_readers_, 0); - CHECK_EQ(num_pending_writers_, 0); + CHECK_EQ(num_pending_writers_.LoadRelaxed(), 0); #else // We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread // may still be using locks. @@ -523,7 +524,7 @@ void ReaderWriterMutex::ExclusiveUnlock(Thread* self) { done = __sync_bool_compare_and_swap(&state_, -1 /* cur_state*/, 0 /* new state */); if (LIKELY(done)) { // cmpxchg may fail due to noise? // Wake any waiters. - if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_ > 0)) { + if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_.LoadRelaxed() > 0)) { futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0); } } @@ -646,7 +647,7 @@ std::ostream& operator<<(std::ostream& os, const ReaderWriterMutex& mu) { ConditionVariable::ConditionVariable(const char* name, Mutex& guard) : name_(name), guard_(guard) { #if ART_USE_FUTEXES - sequence_ = 0; + DCHECK_EQ(0, sequence_.LoadRelaxed()); num_waiters_ = 0; #else pthread_condattr_t cond_attrs; @@ -691,7 +692,7 @@ void ConditionVariable::Broadcast(Thread* self) { sequence_++; // Indicate the broadcast occurred. bool done = false; do { - int32_t cur_sequence = sequence_; + int32_t cur_sequence = sequence_.LoadRelaxed(); // Requeue waiters onto mutex. The waiter holds the contender count on the mutex high ensuring // mutex unlocks will awaken the requeued waiter thread. done = futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0, @@ -740,7 +741,7 @@ void ConditionVariable::WaitHoldingLocks(Thread* self) { // Ensure the Mutex is contended so that requeued threads are awoken. guard_.num_contenders_++; guard_.recursion_count_ = 1; - int32_t cur_sequence = sequence_; + int32_t cur_sequence = sequence_.LoadRelaxed(); guard_.ExclusiveUnlock(self); if (futex(sequence_.Address(), FUTEX_WAIT, cur_sequence, NULL, NULL, 0) != 0) { // Futex failed, check it is an expected error. @@ -754,7 +755,7 @@ void ConditionVariable::WaitHoldingLocks(Thread* self) { CHECK_GE(num_waiters_, 0); num_waiters_--; // We awoke and so no longer require awakes from the guard_'s unlock. - CHECK_GE(guard_.num_contenders_, 0); + CHECK_GE(guard_.num_contenders_.LoadRelaxed(), 0); guard_.num_contenders_--; #else guard_.recursion_count_ = 0; @@ -775,7 +776,7 @@ void ConditionVariable::TimedWait(Thread* self, int64_t ms, int32_t ns) { // Ensure the Mutex is contended so that requeued threads are awoken. guard_.num_contenders_++; guard_.recursion_count_ = 1; - int32_t cur_sequence = sequence_; + int32_t cur_sequence = sequence_.LoadRelaxed(); guard_.ExclusiveUnlock(self); if (futex(sequence_.Address(), FUTEX_WAIT, cur_sequence, &rel_ts, NULL, 0) != 0) { if (errno == ETIMEDOUT) { @@ -790,7 +791,7 @@ void ConditionVariable::TimedWait(Thread* self, int64_t ms, int32_t ns) { CHECK_GE(num_waiters_, 0); num_waiters_--; // We awoke and so no longer require awakes from the guard_'s unlock. - CHECK_GE(guard_.num_contenders_, 0); + CHECK_GE(guard_.num_contenders_.LoadRelaxed(), 0); guard_.num_contenders_--; #else #if !defined(__APPLE__) diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index 3f35670d6c..e13c8d5d62 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -160,12 +160,12 @@ class BaseMutex { void AddToWaitTime(uint64_t value); ContentionLogData() : wait_time(0) {} }; - ContentionLogData contetion_log_data_[kContentionLogDataSize]; + ContentionLogData contention_log_data_[kContentionLogDataSize]; public: bool HasEverContended() const { if (kLogLockContentions) { - return contetion_log_data_->contention_count > 0; + return contention_log_data_->contention_count.LoadSequentiallyConsistent() > 0; } return false; } diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 4bd86db47f..ee276c1670 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -1602,15 +1602,72 @@ extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, mirror::ArtMet } } +// The following definitions create return types for two word-sized entities that will be passed +// in registers so that memory operations for the interface trampolines can be avoided. The entities +// are the resolved method and the pointer to the code to be invoked. +// +// On x86, ARM32 and MIPS, this is given for a *scalar* 64bit value. The definition thus *must* be +// uint64_t or long long int. We use the upper 32b for code, and the lower 32b for the method. +// +// On x86_64 and ARM64, structs are decomposed for allocation, so we can create a structs of two +// size_t-sized values. +// +// We need two operations: +// +// 1) A flag value that signals failure. The assembly stubs expect the method part to be "0". +// GetFailureValue() will return a value that has method == 0. +// +// 2) A value that combines a code pointer and a method pointer. +// GetSuccessValue() constructs this. + +#if defined(__i386__) || defined(__arm__) || defined(__mips__) +typedef uint64_t MethodAndCode; + +// Encodes method_ptr==nullptr and code_ptr==nullptr +static constexpr MethodAndCode GetFailureValue() { + return 0; +} + +// Use the lower 32b for the method pointer and the upper 32b for the code pointer. +static MethodAndCode GetSuccessValue(const void* code, mirror::ArtMethod* method) { + uint32_t method_uint = reinterpret_cast<uint32_t>(method); + uint64_t code_uint = reinterpret_cast<uint32_t>(code); + return ((code_uint << 32) | method_uint); +} + +#elif defined(__x86_64__) || defined(__aarch64__) +struct MethodAndCode { + uintptr_t method; + uintptr_t code; +}; + +// Encodes method_ptr==nullptr. Leaves random value in code pointer. +static MethodAndCode GetFailureValue() { + MethodAndCode ret; + ret.method = 0; + return ret; +} + +// Write values into their respective members. +static MethodAndCode GetSuccessValue(const void* code, mirror::ArtMethod* method) { + MethodAndCode ret; + ret.method = reinterpret_cast<uintptr_t>(method); + ret.code = reinterpret_cast<uintptr_t>(code); + return ret; +} +#else +#error "Unsupported architecture" +#endif + template<InvokeType type, bool access_check> -static uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, - mirror::ArtMethod* caller_method, - Thread* self, mirror::ArtMethod** sp); +static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, + mirror::ArtMethod* caller_method, + Thread* self, mirror::ArtMethod** sp); template<InvokeType type, bool access_check> -static uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, - mirror::ArtMethod* caller_method, - Thread* self, mirror::ArtMethod** sp) { +static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, + mirror::ArtMethod* caller_method, + Thread* self, mirror::ArtMethod** sp) { mirror::ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check, type); if (UNLIKELY(method == nullptr)) { @@ -1630,7 +1687,7 @@ static uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object if (UNLIKELY(method == NULL)) { CHECK(self->IsExceptionPending()); - return 0; // failure + return GetFailureValue(); // Failure. } } DCHECK(!self->IsExceptionPending()); @@ -1639,24 +1696,17 @@ static uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object // When we return, the caller will branch to this address, so it had better not be 0! DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: " << MethodHelper(method).GetDexFile().GetLocation(); -#ifdef __LP64__ - UNIMPLEMENTED(FATAL); - return 0; -#else - uint32_t method_uint = reinterpret_cast<uint32_t>(method); - uint64_t code_uint = reinterpret_cast<uint32_t>(code); - uint64_t result = ((code_uint << 32) | method_uint); - return result; -#endif + + return GetSuccessValue(code, method); } // Explicit artInvokeCommon template function declarations to please analysis tool. #define EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(type, access_check) \ template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) \ - uint64_t artInvokeCommon<type, access_check>(uint32_t method_idx, \ - mirror::Object* this_object, \ - mirror::ArtMethod* caller_method, \ - Thread* self, mirror::ArtMethod** sp) \ + MethodAndCode artInvokeCommon<type, access_check>(uint32_t method_idx, \ + mirror::Object* this_object, \ + mirror::ArtMethod* caller_method, \ + Thread* self, mirror::ArtMethod** sp) \ EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, false); EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, true); @@ -1672,57 +1722,57 @@ EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kSuper, true); // See comments in runtime_support_asm.S -extern "C" uint64_t artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx, - mirror::Object* this_object, - mirror::ArtMethod* caller_method, - Thread* self, - mirror::ArtMethod** sp) +extern "C" MethodAndCode artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx, + mirror::Object* this_object, + mirror::ArtMethod* caller_method, + Thread* self, + mirror::ArtMethod** sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return artInvokeCommon<kInterface, true>(method_idx, this_object, caller_method, self, sp); } -extern "C" uint64_t artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx, - mirror::Object* this_object, - mirror::ArtMethod* caller_method, - Thread* self, - mirror::ArtMethod** sp) +extern "C" MethodAndCode artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx, + mirror::Object* this_object, + mirror::ArtMethod* caller_method, + Thread* self, + mirror::ArtMethod** sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return artInvokeCommon<kDirect, true>(method_idx, this_object, caller_method, self, sp); } -extern "C" uint64_t artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx, - mirror::Object* this_object, - mirror::ArtMethod* caller_method, - Thread* self, - mirror::ArtMethod** sp) +extern "C" MethodAndCode artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx, + mirror::Object* this_object, + mirror::ArtMethod* caller_method, + Thread* self, + mirror::ArtMethod** sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return artInvokeCommon<kStatic, true>(method_idx, this_object, caller_method, self, sp); } -extern "C" uint64_t artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx, - mirror::Object* this_object, - mirror::ArtMethod* caller_method, - Thread* self, - mirror::ArtMethod** sp) +extern "C" MethodAndCode artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx, + mirror::Object* this_object, + mirror::ArtMethod* caller_method, + Thread* self, + mirror::ArtMethod** sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return artInvokeCommon<kSuper, true>(method_idx, this_object, caller_method, self, sp); } -extern "C" uint64_t artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx, - mirror::Object* this_object, - mirror::ArtMethod* caller_method, - Thread* self, - mirror::ArtMethod** sp) +extern "C" MethodAndCode artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx, + mirror::Object* this_object, + mirror::ArtMethod* caller_method, + Thread* self, + mirror::ArtMethod** sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return artInvokeCommon<kVirtual, true>(method_idx, this_object, caller_method, self, sp); } // Determine target of interface dispatch. This object is known non-null. -extern "C" uint64_t artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_method, - mirror::Object* this_object, - mirror::ArtMethod* caller_method, - Thread* self, mirror::ArtMethod** sp) +extern "C" MethodAndCode artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_method, + mirror::Object* this_object, + mirror::ArtMethod* caller_method, + Thread* self, mirror::ArtMethod** sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { mirror::ArtMethod* method; if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) { @@ -1731,7 +1781,7 @@ extern "C" uint64_t artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_me FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs); ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(interface_method, this_object, caller_method); - return 0; // Failure. + return GetFailureValue(); // Failure. } } else { FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs); @@ -1828,7 +1878,7 @@ extern "C" uint64_t artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_me if (UNLIKELY(method == nullptr)) { CHECK(self->IsExceptionPending()); - return 0; // Failure. + return GetFailureValue(); // Failure. } } const void* code = method->GetEntryPointFromQuickCompiledCode(); @@ -1836,15 +1886,8 @@ extern "C" uint64_t artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_me // When we return, the caller will branch to this address, so it had better not be 0! DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: " << MethodHelper(method).GetDexFile().GetLocation(); -#ifdef __LP64__ - UNIMPLEMENTED(FATAL); - return 0; -#else - uint32_t method_uint = reinterpret_cast<uint32_t>(method); - uint64_t code_uint = reinterpret_cast<uint32_t>(code); - uint64_t result = ((code_uint << 32) | method_uint); - return result; -#endif + + return GetSuccessValue(code, method); } } // namespace art diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h index f3ed8d32c0..979970c4e2 100644 --- a/runtime/gc/accounting/atomic_stack.h +++ b/runtime/gc/accounting/atomic_stack.h @@ -46,8 +46,8 @@ class AtomicStack { void Reset() { DCHECK(mem_map_.get() != NULL); DCHECK(begin_ != NULL); - front_index_ = 0; - back_index_ = 0; + front_index_.StoreRelaxed(0); + back_index_.StoreRelaxed(0); debug_is_sorted_ = true; int result = madvise(begin_, sizeof(T) * capacity_, MADV_DONTNEED); if (result == -1) { @@ -64,12 +64,12 @@ class AtomicStack { } int32_t index; do { - index = back_index_; + index = back_index_.LoadRelaxed(); if (UNLIKELY(static_cast<size_t>(index) >= capacity_)) { // Stack overflow. return false; } - } while (!back_index_.CompareAndSwap(index, index + 1)); + } while (!back_index_.CompareExchangeWeakRelaxed(index, index + 1)); begin_[index] = value; return true; } @@ -83,13 +83,13 @@ class AtomicStack { int32_t index; int32_t new_index; do { - index = back_index_; + index = back_index_.LoadRelaxed(); new_index = index + num_slots; if (UNLIKELY(static_cast<size_t>(new_index) >= capacity_)) { // Stack overflow. return false; } - } while (!back_index_.CompareAndSwap(index, new_index)); + } while (!back_index_.CompareExchangeWeakRelaxed(index, new_index)); *start_address = &begin_[index]; *end_address = &begin_[new_index]; if (kIsDebugBuild) { @@ -114,31 +114,31 @@ class AtomicStack { if (kIsDebugBuild) { debug_is_sorted_ = false; } - int32_t index = back_index_; + int32_t index = back_index_.LoadRelaxed(); DCHECK_LT(static_cast<size_t>(index), capacity_); - back_index_ = index + 1; + back_index_.StoreRelaxed(index + 1); begin_[index] = value; } T PopBack() { - DCHECK_GT(back_index_, front_index_); + DCHECK_GT(back_index_.LoadRelaxed(), front_index_.LoadRelaxed()); // Decrement the back index non atomically. - back_index_ = back_index_ - 1; - return begin_[back_index_]; + back_index_.StoreRelaxed(back_index_.LoadRelaxed() - 1); + return begin_[back_index_.LoadRelaxed()]; } // Take an item from the front of the stack. T PopFront() { - int32_t index = front_index_; - DCHECK_LT(index, back_index_.Load()); - front_index_ = front_index_ + 1; + int32_t index = front_index_.LoadRelaxed(); + DCHECK_LT(index, back_index_.LoadRelaxed()); + front_index_.StoreRelaxed(index + 1); return begin_[index]; } // Pop a number of elements. void PopBackCount(int32_t n) { DCHECK_GE(Size(), static_cast<size_t>(n)); - back_index_.FetchAndSub(n); + back_index_.FetchAndSubSequentiallyConsistent(n); } bool IsEmpty() const { @@ -146,16 +146,16 @@ class AtomicStack { } size_t Size() const { - DCHECK_LE(front_index_, back_index_); - return back_index_ - front_index_; + DCHECK_LE(front_index_.LoadRelaxed(), back_index_.LoadRelaxed()); + return back_index_.LoadRelaxed() - front_index_.LoadRelaxed(); } T* Begin() const { - return const_cast<T*>(begin_ + front_index_); + return const_cast<T*>(begin_ + front_index_.LoadRelaxed()); } T* End() const { - return const_cast<T*>(begin_ + back_index_); + return const_cast<T*>(begin_ + back_index_.LoadRelaxed()); } size_t Capacity() const { @@ -169,11 +169,11 @@ class AtomicStack { } void Sort() { - int32_t start_back_index = back_index_.Load(); - int32_t start_front_index = front_index_.Load(); + int32_t start_back_index = back_index_.LoadRelaxed(); + int32_t start_front_index = front_index_.LoadRelaxed(); std::sort(Begin(), End()); - CHECK_EQ(start_back_index, back_index_.Load()); - CHECK_EQ(start_front_index, front_index_.Load()); + CHECK_EQ(start_back_index, back_index_.LoadRelaxed()); + CHECK_EQ(start_front_index, front_index_.LoadRelaxed()); if (kIsDebugBuild) { debug_is_sorted_ = true; } diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index cc258f5a9a..43331c32b5 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -99,9 +99,10 @@ MarkSweep::MarkSweep(Heap* heap, bool is_concurrent, const std::string& name_pre : GarbageCollector(heap, name_prefix + (is_concurrent ? "concurrent mark sweep": "mark sweep")), + current_space_bitmap_(nullptr), mark_bitmap_(nullptr), mark_stack_(nullptr), gc_barrier_(new Barrier(0)), mark_stack_lock_("mark sweep mark stack lock", kMarkSweepMarkStackLock), - is_concurrent_(is_concurrent) { + is_concurrent_(is_concurrent), live_stack_freeze_size_(0) { } void MarkSweep::InitializePhase() { @@ -109,19 +110,19 @@ void MarkSweep::InitializePhase() { mark_stack_ = heap_->GetMarkStack(); DCHECK(mark_stack_ != nullptr); immune_region_.Reset(); - class_count_ = 0; - array_count_ = 0; - other_count_ = 0; - large_object_test_ = 0; - large_object_mark_ = 0; - overhead_time_ = 0; - work_chunks_created_ = 0; - work_chunks_deleted_ = 0; - reference_count_ = 0; - mark_null_count_ = 0; - mark_immune_count_ = 0; - mark_fastpath_count_ = 0; - mark_slowpath_count_ = 0; + class_count_.StoreRelaxed(0); + array_count_.StoreRelaxed(0); + other_count_.StoreRelaxed(0); + large_object_test_.StoreRelaxed(0); + large_object_mark_.StoreRelaxed(0); + overhead_time_ .StoreRelaxed(0); + work_chunks_created_.StoreRelaxed(0); + work_chunks_deleted_.StoreRelaxed(0); + reference_count_.StoreRelaxed(0); + mark_null_count_.StoreRelaxed(0); + mark_immune_count_.StoreRelaxed(0); + mark_fastpath_count_.StoreRelaxed(0); + mark_slowpath_count_.StoreRelaxed(0); { // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap. ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); @@ -596,7 +597,7 @@ class MarkStackTask : public Task { if (kUseFinger) { android_memory_barrier(); if (reinterpret_cast<uintptr_t>(ref) >= - static_cast<uintptr_t>(mark_sweep_->atomic_finger_)) { + static_cast<uintptr_t>(mark_sweep_->atomic_finger_.LoadRelaxed())) { return; } } @@ -881,7 +882,7 @@ void MarkSweep::RecursiveMark() { // This function does not handle heap end increasing, so we must use the space end. uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin()); uintptr_t end = reinterpret_cast<uintptr_t>(space->End()); - atomic_finger_ = static_cast<int32_t>(0xFFFFFFFF); + atomic_finger_.StoreRelaxed(AtomicInteger::MaxValue()); // Create a few worker tasks. const size_t n = thread_count * 2; @@ -1214,7 +1215,9 @@ void MarkSweep::ProcessMarkStackParallel(size_t thread_count) { thread_pool->Wait(self, true, true); thread_pool->StopWorkers(self); mark_stack_->Reset(); - CHECK_EQ(work_chunks_created_, work_chunks_deleted_) << " some of the work chunks were leaked"; + CHECK_EQ(work_chunks_created_.LoadSequentiallyConsistent(), + work_chunks_deleted_.LoadSequentiallyConsistent()) + << " some of the work chunks were leaked"; } // Scan anything that's on the mark stack. @@ -1269,24 +1272,27 @@ inline bool MarkSweep::IsMarked(const Object* object) const void MarkSweep::FinishPhase() { TimingLogger::ScopedSplit split("FinishPhase", &timings_); if (kCountScannedTypes) { - VLOG(gc) << "MarkSweep scanned classes=" << class_count_ << " arrays=" << array_count_ - << " other=" << other_count_; + VLOG(gc) << "MarkSweep scanned classes=" << class_count_.LoadRelaxed() + << " arrays=" << array_count_.LoadRelaxed() << " other=" << other_count_.LoadRelaxed(); } if (kCountTasks) { - VLOG(gc) << "Total number of work chunks allocated: " << work_chunks_created_; + VLOG(gc) << "Total number of work chunks allocated: " << work_chunks_created_.LoadRelaxed(); } if (kMeasureOverhead) { - VLOG(gc) << "Overhead time " << PrettyDuration(overhead_time_); + VLOG(gc) << "Overhead time " << PrettyDuration(overhead_time_.LoadRelaxed()); } if (kProfileLargeObjects) { - VLOG(gc) << "Large objects tested " << large_object_test_ << " marked " << large_object_mark_; + VLOG(gc) << "Large objects tested " << large_object_test_.LoadRelaxed() + << " marked " << large_object_mark_.LoadRelaxed(); } if (kCountJavaLangRefs) { - VLOG(gc) << "References scanned " << reference_count_; + VLOG(gc) << "References scanned " << reference_count_.LoadRelaxed(); } if (kCountMarkedObjects) { - VLOG(gc) << "Marked: null=" << mark_null_count_ << " immune=" << mark_immune_count_ - << " fastpath=" << mark_fastpath_count_ << " slowpath=" << mark_slowpath_count_; + VLOG(gc) << "Marked: null=" << mark_null_count_.LoadRelaxed() + << " immune=" << mark_immune_count_.LoadRelaxed() + << " fastpath=" << mark_fastpath_count_.LoadRelaxed() + << " slowpath=" << mark_slowpath_count_.LoadRelaxed(); } CHECK(mark_stack_->IsEmpty()); // Ensure that the mark stack is empty. mark_stack_->Reset(); diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h index e9a3c3a42b..d73bf3f69f 100644 --- a/runtime/gc/collector/mark_sweep.h +++ b/runtime/gc/collector/mark_sweep.h @@ -305,14 +305,14 @@ class MarkSweep : public GarbageCollector { AtomicInteger mark_fastpath_count_; AtomicInteger mark_slowpath_count_; - // Verification. - size_t live_stack_freeze_size_; - std::unique_ptr<Barrier> gc_barrier_; Mutex mark_stack_lock_ ACQUIRED_AFTER(Locks::classlinker_classes_lock_); const bool is_concurrent_; + // Verification. + size_t live_stack_freeze_size_; + private: friend class AddIfReachesAllocSpaceVisitor; // Used by mod-union table. friend class CardScanTask; diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 7cee5a094c..03b72b6870 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -96,7 +96,7 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas CHECK_LE(obj->SizeOf(), usable_size); } const size_t new_num_bytes_allocated = - static_cast<size_t>(num_bytes_allocated_.FetchAndAdd(bytes_allocated)) + bytes_allocated; + static_cast<size_t>(num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_allocated)) + bytes_allocated; // TODO: Deprecate. if (kInstrumented) { if (Runtime::Current()->HasStatsEnabled()) { @@ -264,7 +264,7 @@ inline Heap::AllocationTimer::~AllocationTimer() { // Only if the allocation succeeded, record the time. if (allocated_obj != nullptr) { uint64_t allocation_end_time = NanoTime() / kTimeAdjust; - heap_->total_allocation_time_.FetchAndAdd(allocation_end_time - allocation_start_time_); + heap_->total_allocation_time_.FetchAndAddSequentiallyConsistent(allocation_end_time - allocation_start_time_); } } }; @@ -279,7 +279,7 @@ inline bool Heap::ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) co template <bool kGrow> inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size) { - size_t new_footprint = num_bytes_allocated_ + alloc_size; + size_t new_footprint = num_bytes_allocated_.LoadSequentiallyConsistent() + alloc_size; if (UNLIKELY(new_footprint > max_allowed_footprint_)) { if (UNLIKELY(new_footprint > growth_limit_)) { return true; diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index d37f2ad960..ea1ccdd665 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -116,6 +116,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max long_pause_log_threshold_(long_pause_log_threshold), long_gc_log_threshold_(long_gc_log_threshold), ignore_max_footprint_(ignore_max_footprint), + zygote_creation_lock_("zygote creation lock", kZygoteCreationLock), have_zygote_space_(false), large_object_threshold_(std::numeric_limits<size_t>::max()), // Starts out disabled. collector_type_running_(kCollectorTypeNone), @@ -292,7 +293,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max } // TODO: Count objects in the image space here. - num_bytes_allocated_ = 0; + num_bytes_allocated_.StoreRelaxed(0); // Default mark stack size in bytes. static const size_t default_mark_stack_size = 64 * KB; @@ -658,13 +659,13 @@ void Heap::RemoveSpace(space::Space* space) { void Heap::RegisterGCAllocation(size_t bytes) { if (this != nullptr) { - gc_memory_overhead_.FetchAndAdd(bytes); + gc_memory_overhead_.FetchAndAddSequentiallyConsistent(bytes); } } void Heap::RegisterGCDeAllocation(size_t bytes) { if (this != nullptr) { - gc_memory_overhead_.FetchAndSub(bytes); + gc_memory_overhead_.FetchAndSubSequentiallyConsistent(bytes); } } @@ -699,7 +700,8 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) { } collector->ResetMeasurements(); } - uint64_t allocation_time = static_cast<uint64_t>(total_allocation_time_) * kTimeAdjust; + uint64_t allocation_time = + static_cast<uint64_t>(total_allocation_time_.LoadRelaxed()) * kTimeAdjust; if (total_duration != 0) { const double total_seconds = static_cast<double>(total_duration / 1000) / 1000000.0; os << "Total time spent in GC: " << PrettyDuration(total_duration) << "\n"; @@ -719,7 +721,7 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) { } os << "Total mutator paused time: " << PrettyDuration(total_paused_time) << "\n"; os << "Total time waiting for GC to complete: " << PrettyDuration(total_wait_time_) << "\n"; - os << "Approximate GC data structures memory overhead: " << gc_memory_overhead_; + os << "Approximate GC data structures memory overhead: " << gc_memory_overhead_.LoadRelaxed(); BaseMutex::DumpAll(os); } @@ -1021,7 +1023,7 @@ void Heap::VerifyObjectBody(mirror::Object* obj) { return; } // Ignore early dawn of the universe verifications. - if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.Load()) < 10 * KB)) { + if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.LoadRelaxed()) < 10 * KB)) { return; } CHECK(IsAligned<kObjectAlignment>(obj)) << "Object isn't aligned: " << obj; @@ -1052,9 +1054,9 @@ void Heap::RecordFree(uint64_t freed_objects, int64_t freed_bytes) { // Use signed comparison since freed bytes can be negative when background compaction foreground // transitions occurs. This is caused by the moving objects from a bump pointer space to a // free list backed space typically increasing memory footprint due to padding and binning. - DCHECK_LE(freed_bytes, static_cast<int64_t>(num_bytes_allocated_.Load())); + DCHECK_LE(freed_bytes, static_cast<int64_t>(num_bytes_allocated_.LoadRelaxed())); // Note: This relies on 2s complement for handling negative freed_bytes. - num_bytes_allocated_.FetchAndSub(static_cast<ssize_t>(freed_bytes)); + num_bytes_allocated_.FetchAndSubSequentiallyConsistent(static_cast<ssize_t>(freed_bytes)); if (Runtime::Current()->HasStatsEnabled()) { RuntimeStats* thread_stats = Thread::Current()->GetStats(); thread_stats->freed_objects += freed_objects; @@ -1312,7 +1314,7 @@ void Heap::TransitionCollector(CollectorType collector_type) { VLOG(heap) << "TransitionCollector: " << static_cast<int>(collector_type_) << " -> " << static_cast<int>(collector_type); uint64_t start_time = NanoTime(); - uint32_t before_allocated = num_bytes_allocated_.Load(); + uint32_t before_allocated = num_bytes_allocated_.LoadSequentiallyConsistent(); ThreadList* tl = Runtime::Current()->GetThreadList(); Thread* self = Thread::Current(); ScopedThreadStateChange tsc(self, kWaitingPerformingGc); @@ -1390,7 +1392,7 @@ void Heap::TransitionCollector(CollectorType collector_type) { uint64_t duration = NanoTime() - start_time; GrowForUtilization(semi_space_collector_); FinishGC(self, collector::kGcTypeFull); - int32_t after_allocated = num_bytes_allocated_.Load(); + int32_t after_allocated = num_bytes_allocated_.LoadSequentiallyConsistent(); int32_t delta_allocated = before_allocated - after_allocated; LOG(INFO) << "Heap transition to " << process_state_ << " took " << PrettyDuration(duration) << " saved at least " << PrettySize(delta_allocated); @@ -1551,7 +1553,6 @@ void Heap::UnBindBitmaps() { void Heap::PreZygoteFork() { CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false); - static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock); Thread* self = Thread::Current(); MutexLock mu(self, zygote_creation_lock_); // Try to see if we have any Zygote spaces. @@ -1866,7 +1867,7 @@ class VerifyReferenceVisitor { : heap_(heap), fail_count_(fail_count), verify_referent_(verify_referent) {} size_t GetFailureCount() const { - return fail_count_->Load(); + return fail_count_->LoadSequentiallyConsistent(); } void operator()(mirror::Class* klass, mirror::Reference* ref) const @@ -1903,7 +1904,7 @@ class VerifyReferenceVisitor { // Verify that the reference is live. return true; } - if (fail_count_->FetchAndAdd(1) == 0) { + if (fail_count_->FetchAndAddSequentiallyConsistent(1) == 0) { // Print message on only on first failure to prevent spam. LOG(ERROR) << "!!!!!!!!!!!!!!Heap corruption detected!!!!!!!!!!!!!!!!!!!"; } @@ -2019,7 +2020,7 @@ class VerifyObjectVisitor { } size_t GetFailureCount() const { - return fail_count_->Load(); + return fail_count_->LoadSequentiallyConsistent(); } private: @@ -2429,7 +2430,7 @@ bool Heap::IsMovableObject(const mirror::Object* obj) const { } void Heap::UpdateMaxNativeFootprint() { - size_t native_size = native_bytes_allocated_; + size_t native_size = native_bytes_allocated_.LoadRelaxed(); // TODO: Tune the native heap utilization to be a value other than the java heap utilization. size_t target_size = native_size / GetTargetHeapUtilization(); if (target_size > native_size + max_free_) { @@ -2701,21 +2702,22 @@ void Heap::RegisterNativeAllocation(JNIEnv* env, int bytes) { native_need_to_run_finalization_ = false; } // Total number of native bytes allocated. - native_bytes_allocated_.FetchAndAdd(bytes); - if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_gc_watermark_) { + size_t new_native_bytes_allocated = native_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes); + new_native_bytes_allocated += bytes; + if (new_native_bytes_allocated > native_footprint_gc_watermark_) { collector::GcType gc_type = have_zygote_space_ ? collector::kGcTypePartial : collector::kGcTypeFull; // The second watermark is higher than the gc watermark. If you hit this it means you are // allocating native objects faster than the GC can keep up with. - if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) { + if (new_native_bytes_allocated > native_footprint_limit_) { if (WaitForGcToComplete(kGcCauseForNativeAlloc, self) != collector::kGcTypeNone) { // Just finished a GC, attempt to run finalizers. RunFinalization(env); CHECK(!env->ExceptionCheck()); } // If we still are over the watermark, attempt a GC for alloc and run finalizers. - if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) { + if (new_native_bytes_allocated > native_footprint_limit_) { CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false); RunFinalization(env); native_need_to_run_finalization_ = false; @@ -2737,7 +2739,7 @@ void Heap::RegisterNativeAllocation(JNIEnv* env, int bytes) { void Heap::RegisterNativeFree(JNIEnv* env, int bytes) { int expected_size, new_size; do { - expected_size = native_bytes_allocated_.Load(); + expected_size = native_bytes_allocated_.LoadRelaxed(); new_size = expected_size - bytes; if (UNLIKELY(new_size < 0)) { ScopedObjectAccess soa(env); @@ -2746,7 +2748,7 @@ void Heap::RegisterNativeFree(JNIEnv* env, int bytes) { "registered as allocated", bytes, expected_size).c_str()); break; } - } while (!native_bytes_allocated_.CompareAndSwap(expected_size, new_size)); + } while (!native_bytes_allocated_.CompareExchangeWeakRelaxed(expected_size, new_size)); } size_t Heap::GetTotalMemory() const { @@ -2781,9 +2783,9 @@ void Heap::AddRememberedSet(accounting::RememberedSet* remembered_set) { CHECK(remembered_set != nullptr); space::Space* space = remembered_set->GetSpace(); CHECK(space != nullptr); - CHECK(remembered_sets_.find(space) == remembered_sets_.end()); + CHECK(remembered_sets_.find(space) == remembered_sets_.end()) << space; remembered_sets_.Put(space, remembered_set); - CHECK(remembered_sets_.find(space) != remembered_sets_.end()); + CHECK(remembered_sets_.find(space) != remembered_sets_.end()) << space; } void Heap::RemoveRememberedSet(space::Space* space) { diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 6fe0dcf24e..887b17eb05 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -372,7 +372,7 @@ class Heap { // Returns the number of bytes currently allocated. size_t GetBytesAllocated() const { - return num_bytes_allocated_; + return num_bytes_allocated_.LoadSequentiallyConsistent(); } // Returns the number of objects currently allocated. @@ -408,7 +408,7 @@ class Heap { // Implements java.lang.Runtime.freeMemory. size_t GetFreeMemory() const { - return GetTotalMemory() - num_bytes_allocated_; + return GetTotalMemory() - num_bytes_allocated_.LoadSequentiallyConsistent(); } // get the space that corresponds to an object's address. Current implementation searches all @@ -778,6 +778,9 @@ class Heap { // useful for benchmarking since it reduces time spent in GC to a low %. const bool ignore_max_footprint_; + // Lock which guards zygote space creation. + Mutex zygote_creation_lock_; + // If we have a zygote space. bool have_zygote_space_; diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h index 497a61f273..71c295e147 100644 --- a/runtime/gc/space/bump_pointer_space-inl.h +++ b/runtime/gc/space/bump_pointer_space-inl.h @@ -48,8 +48,8 @@ inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t end_ += num_bytes; *bytes_allocated = num_bytes; // Use the CAS free versions as an optimization. - objects_allocated_ = objects_allocated_ + 1; - bytes_allocated_ = bytes_allocated_ + num_bytes; + objects_allocated_.StoreRelaxed(objects_allocated_.LoadRelaxed() + 1); + bytes_allocated_.StoreRelaxed(bytes_allocated_.LoadRelaxed() + num_bytes); if (UNLIKELY(usable_size != nullptr)) { *usable_size = num_bytes; } @@ -76,8 +76,8 @@ inline mirror::Object* BumpPointerSpace::AllocNonvirtualWithoutAccounting(size_t inline mirror::Object* BumpPointerSpace::AllocNonvirtual(size_t num_bytes) { mirror::Object* ret = AllocNonvirtualWithoutAccounting(num_bytes); if (ret != nullptr) { - objects_allocated_.FetchAndAdd(1); - bytes_allocated_.FetchAndAdd(num_bytes); + objects_allocated_.FetchAndAddSequentiallyConsistent(1); + bytes_allocated_.FetchAndAddSequentiallyConsistent(num_bytes); } return ret; } diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc index fcd772bba5..fd0a92d56f 100644 --- a/runtime/gc/space/bump_pointer_space.cc +++ b/runtime/gc/space/bump_pointer_space.cc @@ -68,8 +68,8 @@ void BumpPointerSpace::Clear() { // Reset the end of the space back to the beginning, we move the end forward as we allocate // objects. SetEnd(Begin()); - objects_allocated_ = 0; - bytes_allocated_ = 0; + objects_allocated_.StoreRelaxed(0); + bytes_allocated_.StoreRelaxed(0); growth_end_ = Limit(); { MutexLock mu(Thread::Current(), block_lock_); @@ -204,7 +204,7 @@ accounting::ContinuousSpaceBitmap::SweepCallback* BumpPointerSpace::GetSweepCall uint64_t BumpPointerSpace::GetBytesAllocated() { // Start out pre-determined amount (blocks which are not being allocated into). - uint64_t total = static_cast<uint64_t>(bytes_allocated_.Load()); + uint64_t total = static_cast<uint64_t>(bytes_allocated_.LoadRelaxed()); Thread* self = Thread::Current(); MutexLock mu(self, *Locks::runtime_shutdown_lock_); MutexLock mu2(self, *Locks::thread_list_lock_); @@ -222,7 +222,7 @@ uint64_t BumpPointerSpace::GetBytesAllocated() { uint64_t BumpPointerSpace::GetObjectsAllocated() { // Start out pre-determined amount (blocks which are not being allocated into). - uint64_t total = static_cast<uint64_t>(objects_allocated_.Load()); + uint64_t total = static_cast<uint64_t>(objects_allocated_.LoadRelaxed()); Thread* self = Thread::Current(); MutexLock mu(self, *Locks::runtime_shutdown_lock_); MutexLock mu2(self, *Locks::thread_list_lock_); @@ -239,8 +239,8 @@ uint64_t BumpPointerSpace::GetObjectsAllocated() { } void BumpPointerSpace::RevokeThreadLocalBuffersLocked(Thread* thread) { - objects_allocated_.FetchAndAdd(thread->GetThreadLocalObjectsAllocated()); - bytes_allocated_.FetchAndAdd(thread->GetThreadLocalBytesAllocated()); + objects_allocated_.FetchAndAddSequentiallyConsistent(thread->GetThreadLocalObjectsAllocated()); + bytes_allocated_.FetchAndAddSequentiallyConsistent(thread->GetThreadLocalBytesAllocated()); thread->SetTlab(nullptr, nullptr); } diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index 6ea94a91a2..45fee14519 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -239,7 +239,7 @@ ImageSpace* ImageSpace::Init(const char* image_filename, const char* image_locat *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str()); return nullptr; } - uint32_t bitmap_index = bitmap_index_.FetchAndAdd(1); + uint32_t bitmap_index = bitmap_index_.FetchAndAddSequentiallyConsistent(1); std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_filename, bitmap_index)); std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap( diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc index 046641362d..fb3a12efb5 100644 --- a/runtime/gc/space/zygote_space.cc +++ b/runtime/gc/space/zygote_space.cc @@ -115,7 +115,7 @@ void ZygoteSpace::SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* ar // Need to mark the card since this will update the mod-union table next GC cycle. card_table->MarkCard(ptrs[i]); } - zygote_space->objects_allocated_.FetchAndSub(num_ptrs); + zygote_space->objects_allocated_.FetchAndSubSequentiallyConsistent(num_ptrs); } } // namespace space diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h index 50fc62b699..5d5fe76b74 100644 --- a/runtime/gc/space/zygote_space.h +++ b/runtime/gc/space/zygote_space.h @@ -65,7 +65,7 @@ class ZygoteSpace FINAL : public ContinuousMemMapAllocSpace { } uint64_t GetObjectsAllocated() { - return objects_allocated_; + return objects_allocated_.LoadSequentiallyConsistent(); } void Clear() OVERRIDE; diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc index 075d225bf4..2dbcc8058d 100644 --- a/runtime/instrumentation.cc +++ b/runtime/instrumentation.cc @@ -522,9 +522,9 @@ void Instrumentation::SetEntrypointsInstrumented(bool instrumented) { void Instrumentation::InstrumentQuickAllocEntryPoints() { // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code // should be guarded by a lock. - DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.Load(), 0); + DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.LoadSequentiallyConsistent(), 0); const bool enable_instrumentation = - quick_alloc_entry_points_instrumentation_counter_.FetchAndAdd(1) == 0; + quick_alloc_entry_points_instrumentation_counter_.FetchAndAddSequentiallyConsistent(1) == 0; if (enable_instrumentation) { SetEntrypointsInstrumented(true); } @@ -533,9 +533,9 @@ void Instrumentation::InstrumentQuickAllocEntryPoints() { void Instrumentation::UninstrumentQuickAllocEntryPoints() { // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code // should be guarded by a lock. - DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.Load(), 0); + DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.LoadSequentiallyConsistent(), 0); const bool disable_instrumentation = - quick_alloc_entry_points_instrumentation_counter_.FetchAndSub(1) == 1; + quick_alloc_entry_points_instrumentation_counter_.FetchAndSubSequentiallyConsistent(1) == 1; if (disable_instrumentation) { SetEntrypointsInstrumented(false); } diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc index e0f9e5f958..9a274f618a 100644 --- a/runtime/interpreter/interpreter_goto_table_impl.cc +++ b/runtime/interpreter/interpreter_goto_table_impl.cc @@ -234,9 +234,9 @@ JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* HANDLE_INSTRUCTION_END(); HANDLE_INSTRUCTION_START(MOVE_EXCEPTION) { - Throwable* exception = self->GetException(NULL); - self->ClearException(); + Throwable* exception = self->GetException(nullptr); shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception); + self->ClearException(); ADVANCE(1); } HANDLE_INSTRUCTION_END(); diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index c0275f636a..68759ad65a 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -163,9 +163,9 @@ JValue ExecuteSwitchImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem break; case Instruction::MOVE_EXCEPTION: { PREAMBLE(); - Throwable* exception = self->GetException(NULL); - self->ClearException(); + Throwable* exception = self->GetException(nullptr); shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception); + self->ClearException(); inst = inst->Next_1xx(); break; } diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc index 04905a57ca..69e5a84fed 100644 --- a/runtime/mirror/object.cc +++ b/runtime/mirror/object.cc @@ -139,10 +139,10 @@ int32_t Object::GenerateIdentityHashCode() { static AtomicInteger seed(987654321 + std::time(nullptr)); int32_t expected_value, new_value; do { - expected_value = static_cast<uint32_t>(seed.Load()); + expected_value = static_cast<uint32_t>(seed.LoadRelaxed()); new_value = expected_value * 1103515245 + 12345; } while ((expected_value & LockWord::kHashMask) == 0 || - !seed.CompareAndSwap(expected_value, new_value)); + !seed.CompareExchangeWeakRelaxed(expected_value, new_value)); return expected_value & LockWord::kHashMask; } diff --git a/runtime/monitor.cc b/runtime/monitor.cc index 0beb6514ee..f783edbfc3 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -99,12 +99,12 @@ Monitor::Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_ int32_t Monitor::GetHashCode() { while (!HasHashCode()) { - if (hash_code_.CompareAndSwap(0, mirror::Object::GenerateIdentityHashCode())) { + if (hash_code_.CompareExchangeWeakRelaxed(0, mirror::Object::GenerateIdentityHashCode())) { break; } } DCHECK(HasHashCode()); - return hash_code_.Load(); + return hash_code_.LoadRelaxed(); } bool Monitor::Install(Thread* self) { @@ -119,7 +119,7 @@ bool Monitor::Install(Thread* self) { break; } case LockWord::kHashCode: { - CHECK_EQ(hash_code_, static_cast<int32_t>(lw.GetHashCode())); + CHECK_EQ(hash_code_.LoadRelaxed(), static_cast<int32_t>(lw.GetHashCode())); break; } case LockWord::kFatLocked: { diff --git a/runtime/monitor.h b/runtime/monitor.h index bc5d2e4bb9..bc1b2ed4eb 100644 --- a/runtime/monitor.h +++ b/runtime/monitor.h @@ -107,7 +107,7 @@ class Monitor { bool IsLocked() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool HasHashCode() const { - return hash_code_.Load() != 0; + return hash_code_.LoadRelaxed() != 0; } MonitorId GetMonitorId() const { diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index 69b05f41c4..d9c9b5937d 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -155,6 +155,21 @@ static jstring VMRuntime_vmLibrary(JNIEnv* env, jobject) { return env->NewStringUTF(kIsDebugBuild ? "libartd.so" : "libart.so"); } +static jstring VMRuntime_vmInstructionSet(JNIEnv* env, jobject) { + InstructionSet isa = Runtime::Current()->GetInstructionSet(); + const char* isa_string = GetInstructionSetString(isa); + return env->NewStringUTF(isa_string); +} + +static jboolean VMRuntime_is64Bit(JNIEnv* env, jobject) { + bool is64BitMode = (sizeof(void*) == sizeof(uint64_t)); + return is64BitMode ? JNI_TRUE : JNI_FALSE; +} + +static jboolean VMRuntime_isCheckJniEnabled(JNIEnv* env, jobject) { + return Runtime::Current()->GetJavaVM()->check_jni ? JNI_TRUE : JNI_FALSE; +} + static void VMRuntime_setTargetSdkVersionNative(JNIEnv* env, jobject, jint targetSdkVersion) { // This is the target SDK version of the app we're about to run. It is intended that this a place // where workarounds can be enabled. @@ -529,6 +544,9 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(VMRuntime, trimHeap, "()V"), NATIVE_METHOD(VMRuntime, vmVersion, "()Ljava/lang/String;"), NATIVE_METHOD(VMRuntime, vmLibrary, "()Ljava/lang/String;"), + NATIVE_METHOD(VMRuntime, vmInstructionSet, "()Ljava/lang/String;"), + NATIVE_METHOD(VMRuntime, is64Bit, "!()Z"), + NATIVE_METHOD(VMRuntime, isCheckJniEnabled, "!()Z"), NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"), NATIVE_METHOD(VMRuntime, registerAppInfo, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V"), }; diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index 2987393bf2..4330d275a9 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -533,7 +533,7 @@ bool ParsedOptions::Parse(const Runtime::Options& options, bool ignore_unrecogni Trace::SetDefaultClockSource(kProfilerClockSourceWall); } else if (option == "-Xprofile:dualclock") { Trace::SetDefaultClockSource(kProfilerClockSourceDual); - } else if (StartsWith(option, "-Xprofile:")) { + } else if (StartsWith(option, "-Xprofile-filename:")) { if (!ParseStringAfterChar(option, ':', &profile_output_filename_)) { return false; } @@ -786,7 +786,7 @@ void ParsedOptions::Usage(const char* fmt, ...) { UsageMessage(stream, " -Xmethod-trace\n"); UsageMessage(stream, " -Xmethod-trace-file:filename"); UsageMessage(stream, " -Xmethod-trace-file-size:integervalue\n"); - UsageMessage(stream, " -Xprofile=filename\n"); + UsageMessage(stream, " -Xprofile-filename:filename\n"); UsageMessage(stream, " -Xprofile-period:integervalue\n"); UsageMessage(stream, " -Xprofile-duration:integervalue\n"); UsageMessage(stream, " -Xprofile-interval:integervalue\n"); diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc index c1a1ad73e0..292c94f64b 100644 --- a/runtime/thread_pool_test.cc +++ b/runtime/thread_pool_test.cc @@ -69,7 +69,7 @@ TEST_F(ThreadPoolTest, CheckRun) { // Wait for tasks to complete. thread_pool.Wait(self, true, false); // Make sure that we finished all the work. - EXPECT_EQ(num_tasks, count); + EXPECT_EQ(num_tasks, count.LoadSequentiallyConsistent()); } TEST_F(ThreadPoolTest, StopStart) { @@ -82,7 +82,7 @@ TEST_F(ThreadPoolTest, StopStart) { } usleep(200); // Check that no threads started prematurely. - EXPECT_EQ(0, count); + EXPECT_EQ(0, count.LoadSequentiallyConsistent()); // Signal the threads to start processing tasks. thread_pool.StartWorkers(self); usleep(200); @@ -91,10 +91,11 @@ TEST_F(ThreadPoolTest, StopStart) { thread_pool.AddTask(self, new CountTask(&bad_count)); usleep(200); // Ensure that the task added after the workers were stopped doesn't get run. - EXPECT_EQ(0, bad_count); + EXPECT_EQ(0, bad_count.LoadSequentiallyConsistent()); // Allow tasks to finish up and delete themselves. thread_pool.StartWorkers(self); - while (count.Load() != num_tasks && bad_count.Load() != 1) { + while (count.LoadSequentiallyConsistent() != num_tasks && + bad_count.LoadSequentiallyConsistent() != 1) { usleep(200); } thread_pool.StopWorkers(self); @@ -135,7 +136,7 @@ TEST_F(ThreadPoolTest, RecursiveTest) { thread_pool.AddTask(self, new TreeTask(&thread_pool, &count, depth)); thread_pool.StartWorkers(self); thread_pool.Wait(self, true, false); - EXPECT_EQ((1 << depth) - 1, count); + EXPECT_EQ((1 << depth) - 1, count.LoadSequentiallyConsistent()); } } // namespace art diff --git a/test/Android.mk b/test/Android.mk index 71f6be16a4..8caa033b98 100644 --- a/test/Android.mk +++ b/test/Android.mk @@ -116,7 +116,7 @@ define declare-test-art-oat-targets-impl test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX): $(ART_TEST_OUT)/oat-test-dex-$(1).jar test-art-target-sync adb shell touch $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@ adb shell rm $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@ - adb shell sh -c "/system/bin/dalvikvm$($(2)ART_TARGET_BINARY_SUFFIX) $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$(ART_TEST_DIR)/core.art -classpath $(ART_TEST_DIR)/oat-test-dex-$(1).jar -Djava.library.path=$(ART_TEST_DIR)/$(TARGET_$(2)ARCH) $(1) && touch $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@" + adb shell sh -c "/system/bin/dalvikvm$($(2)ART_PHONY_TEST_TARGET_SUFFIX) $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$(ART_TEST_DIR)/core.art -classpath $(ART_TEST_DIR)/oat-test-dex-$(1).jar -Djava.library.path=$(ART_TEST_DIR)/$(TARGET_$(2)ARCH) $(1) && touch $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@" $(hide) (adb pull $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@ /tmp/ && echo $$@ PASSED) || (echo $$@ FAILED && exit 1) $(hide) rm /tmp/$$@ endef |