diff options
75 files changed, 2385 insertions, 838 deletions
diff --git a/Android.mk b/Android.mk index 3324458bf5..312507f957 100644 --- a/Android.mk +++ b/Android.mk @@ -401,14 +401,14 @@ endif .PHONY: dump-oat-core-target ifeq ($(ART_BUILD_TARGET),true) dump-oat-core-target: $(TARGET_CORE_IMG_OUT) $(OATDUMP) - $(OATDUMP) --image=$(TARGET_CORE_IMG_OUT) --output=$(ART_DUMP_OAT_PATH)/core.target.oatdump.txt + $(OATDUMP) --image=$(TARGET_CORE_IMG_LOCATION) --output=$(ART_DUMP_OAT_PATH)/core.target.oatdump.txt --instruction-set=$(TARGET_ARCH) @echo Output in $(ART_DUMP_OAT_PATH)/core.target.oatdump.txt endif .PHONY: dump-oat-boot ifeq ($(ART_BUILD_TARGET_NDEBUG),true) -dump-oat-boot: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(OATDUMP) - $(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt +dump-oat-boot: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME) $(OATDUMP) + $(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE_LOCATION) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt --instruction-set=$(TARGET_ARCH) @echo Output in $(ART_DUMP_OAT_PATH)/boot.oatdump.txt endif diff --git a/build/Android.common.mk b/build/Android.common.mk index ae54efb061..83c536f667 100644 --- a/build/Android.common.mk +++ b/build/Android.common.mk @@ -402,5 +402,6 @@ ifdef TARGET_2ND_ARCH endif HOST_CORE_IMG_LOCATION := $(HOST_OUT_JAVA_LIBRARIES)/core.art +TARGET_CORE_IMG_LOCATION := $(ART_TEST_OUT)/core.art endif # ANDROID_COMMON_MK diff --git a/build/Android.executable.mk b/build/Android.executable.mk index 3c33975a4d..a186e8511f 100644 --- a/build/Android.executable.mk +++ b/build/Android.executable.mk @@ -99,8 +99,8 @@ define build-art-executable LOCAL_MULTILIB := $$(art_multilib) endif + include external/libcxx/libcxx.mk ifeq ($$(art_target_or_host),target) - include external/libcxx/libcxx.mk include $(BUILD_EXECUTABLE) ART_TARGET_EXECUTABLES := $(ART_TARGET_EXECUTABLES) $(TARGET_OUT_EXECUTABLES)/$$(LOCAL_MODULE) else # host diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 952f79a489..20e6aad5b1 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -82,6 +82,7 @@ COMPILER_GTEST_COMMON_SRC_FILES := \ compiler/optimizing/linearize_test.cc \ compiler/optimizing/liveness_test.cc \ compiler/optimizing/live_ranges_test.cc \ + compiler/optimizing/parallel_move_test.cc \ compiler/optimizing/pretty_printer_test.cc \ compiler/optimizing/ssa_test.cc \ compiler/output_stream_test.cc \ @@ -182,6 +183,7 @@ define build-art-test endif LOCAL_CFLAGS := $(ART_TEST_CFLAGS) + include external/libcxx/libcxx.mk ifeq ($$(art_target_or_host),target) LOCAL_CLANG := $(ART_TARGET_CLANG) LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS) @@ -191,7 +193,6 @@ define build-art-test LOCAL_MODULE_PATH_32 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_32) LOCAL_MODULE_PATH_64 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_64) LOCAL_MULTILIB := both - include external/libcxx/libcxx.mk include $(BUILD_EXECUTABLE) ART_TARGET_GTEST_EXECUTABLES$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_NATIVETEST_OUT)/$(TARGET_ARCH)/$$(LOCAL_MODULE) @@ -216,7 +217,7 @@ $$(art_gtest_target): $$(art_gtest_target)$(ART_PHONY_TEST_TARGET_SUFFIX) LOCAL_STATIC_LIBRARIES += libcutils libvixl ifneq ($(WITHOUT_HOST_CLANG),true) # GCC host compiled tests fail with this linked, presumably due to destructors that run. - LOCAL_STATIC_LIBRARIES += libgtest_host + LOCAL_STATIC_LIBRARIES += libgtest_libc++_host endif LOCAL_LDLIBS += -lpthread -ldl LOCAL_IS_HOST_MODULE := true diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk index 696532684e..9e5f3d6e4e 100644 --- a/build/Android.libarttest.mk +++ b/build/Android.libarttest.mk @@ -46,6 +46,7 @@ define build-libarttest LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/build/Android.common.mk LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/build/Android.libarttest.mk + include external/libcxx/libcxx.mk ifeq ($$(art_target_or_host),target) LOCAL_CLANG := $(ART_TARGET_CLANG) LOCAL_CFLAGS := $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS) @@ -56,13 +57,12 @@ define build-libarttest LOCAL_MODULE_PATH_32 := $(ART_TEST_OUT)/$(ART_TARGET_ARCH_32) LOCAL_MODULE_PATH_64 := $(ART_TEST_OUT)/$(ART_TARGET_ARCH_64) LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH) - include external/libcxx/libcxx.mk include $(BUILD_SHARED_LIBRARY) else # host LOCAL_CLANG := $(ART_HOST_CLANG) LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS) LOCAL_STATIC_LIBRARIES := libcutils - LOCAL_LDLIBS := -ldl -lpthread + LOCAL_LDLIBS += -ldl -lpthread ifeq ($(HOST_OS),linux) LOCAL_LDLIBS += -lrt endif diff --git a/compiler/Android.mk b/compiler/Android.mk index cb900eac61..274678ad67 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -59,8 +59,8 @@ LIBART_COMPILER_SRC_FILES := \ dex/mir_field_info.cc \ dex/mir_method_info.cc \ dex/mir_optimization.cc \ - dex/pass_driver.cc \ dex/bb_optimizations.cc \ + dex/pass_driver_me.cc \ dex/bit_vector_block_iterator.cc \ dex/frontend.cc \ dex/mir_graph.cc \ @@ -83,8 +83,10 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/code_generator_arm.cc \ optimizing/code_generator_x86.cc \ optimizing/graph_visualizer.cc \ + optimizing/locations.cc \ optimizing/nodes.cc \ optimizing/optimizing_compiler.cc \ + optimizing/parallel_move_resolver.cc \ optimizing/ssa_builder.cc \ optimizing/ssa_liveness_analysis.cc \ trampolines/trampoline_compiler.cc \ @@ -194,8 +196,8 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT LOCAL_GENERATED_SOURCES += $$(ENUM_OPERATOR_OUT_GEN) LOCAL_CFLAGS := $$(LIBART_COMPILER_CFLAGS) + include external/libcxx/libcxx.mk ifeq ($$(art_target_or_host),target) - include external/libcxx/libcxx.mk LOCAL_CLANG := $(ART_TARGET_CLANG) LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) else # host @@ -247,7 +249,7 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime ifeq ($$(art_target_or_host),host) - LOCAL_LDLIBS := -ldl -lpthread + LOCAL_LDLIBS += -ldl -lpthread endif LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc index abfa7a7eb7..1852f805f4 100644 --- a/compiler/dex/bb_optimizations.cc +++ b/compiler/dex/bb_optimizations.cc @@ -23,7 +23,13 @@ namespace art { /* * Code Layout pass implementation start. */ -bool CodeLayout::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { +bool CodeLayout::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->LayoutBlocks(bb); // No need of repeating, so just return false. return false; @@ -32,13 +38,22 @@ bool CodeLayout::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { /* * SSATransformation pass implementation start. */ -bool SSATransformation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { +bool SSATransformation::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->InsertPhiNodeOperands(bb); // No need of repeating, so just return false. return false; } -void SSATransformation::End(CompilationUnit* cUnit) const { +void SSATransformation::End(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); // Verify the dataflow information after the pass. if (cUnit->enable_debug & (1 << kDebugVerifyDataflow)) { cUnit->mir_graph->VerifyDataflow(); @@ -48,7 +63,13 @@ void SSATransformation::End(CompilationUnit* cUnit) const { /* * ConstantPropagation pass implementation start */ -bool ConstantPropagation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { +bool ConstantPropagation::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->DoConstantPropagation(bb); // No need of repeating, so just return false. return false; @@ -57,7 +78,10 @@ bool ConstantPropagation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb /* * MethodUseCount pass implementation start. */ -bool MethodUseCount::Gate(const CompilationUnit* cUnit) const { +bool MethodUseCount::Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); // First initialize the data. cUnit->mir_graph->InitializeMethodUses(); @@ -67,7 +91,13 @@ bool MethodUseCount::Gate(const CompilationUnit* cUnit) const { return res; } -bool MethodUseCount::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { +bool MethodUseCount::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->CountUses(bb); // No need of repeating, so just return false. return false; @@ -76,7 +106,13 @@ bool MethodUseCount::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) con /* * BasicBlock Combine pass implementation start. */ -bool BBCombine::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { +bool BBCombine::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->CombineBlocks(bb); // No need of repeating, so just return false. @@ -86,7 +122,10 @@ bool BBCombine::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { /* * BasicBlock Optimization pass implementation start. */ -void BBOptimizations::Start(CompilationUnit* cUnit) const { +void BBOptimizations::Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); /* * This pass has a different ordering depEnding on the suppress exception, * so do the pass here for now: diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h index 6d500a56ec..43dcdf4504 100644 --- a/compiler/dex/bb_optimizations.h +++ b/compiler/dex/bb_optimizations.h @@ -18,7 +18,7 @@ #define ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_ #include "compiler_internals.h" -#include "pass.h" +#include "pass_me.h" namespace art { @@ -26,16 +26,22 @@ namespace art { * @class CacheFieldLoweringInfo * @brief Cache the lowering info for fields used by IGET/IPUT/SGET/SPUT insns. */ -class CacheFieldLoweringInfo : public Pass { +class CacheFieldLoweringInfo : public PassME { public: - CacheFieldLoweringInfo() : Pass("CacheFieldLoweringInfo", kNoNodes) { + CacheFieldLoweringInfo() : PassME("CacheFieldLoweringInfo", kNoNodes) { } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->DoCacheFieldLoweringInfo(); } - bool Gate(const CompilationUnit *cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return cUnit->mir_graph->HasFieldAccess(); } }; @@ -44,16 +50,22 @@ class CacheFieldLoweringInfo : public Pass { * @class CacheMethodLoweringInfo * @brief Cache the lowering info for methods called by INVOKEs. */ -class CacheMethodLoweringInfo : public Pass { +class CacheMethodLoweringInfo : public PassME { public: - CacheMethodLoweringInfo() : Pass("CacheMethodLoweringInfo", kNoNodes) { + CacheMethodLoweringInfo() : PassME("CacheMethodLoweringInfo", kNoNodes) { } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->DoCacheMethodLoweringInfo(); } - bool Gate(const CompilationUnit *cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return cUnit->mir_graph->HasInvokes(); } }; @@ -62,26 +74,41 @@ class CacheMethodLoweringInfo : public Pass { * @class CallInlining * @brief Perform method inlining pass. */ -class CallInlining : public Pass { +class CallInlining : public PassME { public: - CallInlining() : Pass("CallInlining") { + CallInlining() : PassME("CallInlining") { } - bool Gate(const CompilationUnit* cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return cUnit->mir_graph->InlineCallsGate(); } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->InlineCallsStart(); } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { + bool Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); cUnit->mir_graph->InlineCalls(bb); // No need of repeating, so just return false. return false; } - void End(CompilationUnit* cUnit) const { + void End(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->InlineCallsEnd(); } }; @@ -90,48 +117,57 @@ class CallInlining : public Pass { * @class CodeLayout * @brief Perform the code layout pass. */ -class CodeLayout : public Pass { +class CodeLayout : public PassME { public: - CodeLayout() : Pass("CodeLayout", "2_post_layout_cfg") { + CodeLayout() : PassME("CodeLayout", "2_post_layout_cfg") { } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->VerifyDataflow(); } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const; + bool Worker(const PassDataHolder* data) const; }; /** * @class SSATransformation * @brief Perform an SSA representation pass on the CompilationUnit. */ -class SSATransformation : public Pass { +class SSATransformation : public PassME { public: - SSATransformation() : Pass("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") { + SSATransformation() : PassME("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") { } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const; + bool Worker(const PassDataHolder* data) const; - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->InitializeSSATransformation(); } - void End(CompilationUnit* cUnit) const; + void End(const PassDataHolder* data) const; }; /** * @class ConstantPropagation * @brief Perform a constant propagation pass. */ -class ConstantPropagation : public Pass { +class ConstantPropagation : public PassME { public: - ConstantPropagation() : Pass("ConstantPropagation") { + ConstantPropagation() : PassME("ConstantPropagation") { } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const; + bool Worker(const PassDataHolder* data) const; - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->InitializeConstantPropagation(); } }; @@ -140,12 +176,15 @@ class ConstantPropagation : public Pass { * @class InitRegLocations * @brief Initialize Register Locations. */ -class InitRegLocations : public Pass { +class InitRegLocations : public PassME { public: - InitRegLocations() : Pass("InitRegLocation", kNoNodes) { + InitRegLocations() : PassME("InitRegLocation", kNoNodes) { } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->InitRegLocations(); } }; @@ -154,53 +193,77 @@ class InitRegLocations : public Pass { * @class MethodUseCount * @brief Count the register uses of the method */ -class MethodUseCount : public Pass { +class MethodUseCount : public PassME { public: - MethodUseCount() : Pass("UseCount") { + MethodUseCount() : PassME("UseCount") { } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const; + bool Worker(const PassDataHolder* data) const; - bool Gate(const CompilationUnit* cUnit) const; + bool Gate(const PassDataHolder* data) const; }; /** * @class NullCheckEliminationAndTypeInference * @brief Null check elimination and type inference. */ -class NullCheckEliminationAndTypeInference : public Pass { +class NullCheckEliminationAndTypeInference : public PassME { public: NullCheckEliminationAndTypeInference() - : Pass("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") { + : PassME("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") { } - void Start(CompilationUnit* cUnit) const { + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->EliminateNullChecksAndInferTypesStart(); } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { + bool Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); return cUnit->mir_graph->EliminateNullChecksAndInferTypes(bb); } - void End(CompilationUnit* cUnit) const { + void End(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->EliminateNullChecksAndInferTypesEnd(); } }; -class ClassInitCheckElimination : public Pass { +class ClassInitCheckElimination : public PassME { public: - ClassInitCheckElimination() : Pass("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) { + ClassInitCheckElimination() : PassME("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) { } - bool Gate(const CompilationUnit* cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return cUnit->mir_graph->EliminateClassInitChecksGate(); } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const { + bool Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* cUnit = pass_me_data_holder->c_unit; + DCHECK(cUnit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); return cUnit->mir_graph->EliminateClassInitChecks(bb); } - void End(CompilationUnit* cUnit) const { + void End(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); cUnit->mir_graph->EliminateClassInitChecksEnd(); } }; @@ -209,32 +272,38 @@ class ClassInitCheckElimination : public Pass { * @class NullCheckEliminationAndTypeInference * @brief Null check elimination and type inference. */ -class BBCombine : public Pass { +class BBCombine : public PassME { public: - BBCombine() : Pass("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") { + BBCombine() : PassME("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") { } - bool Gate(const CompilationUnit* cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return ((cUnit->disable_opt & (1 << kSuppressExceptionEdges)) != 0); } - bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const; + bool Worker(const PassDataHolder* data) const; }; /** * @class BasicBlock Optimizations * @brief Any simple BasicBlock optimization can be put here. */ -class BBOptimizations : public Pass { +class BBOptimizations : public PassME { public: - BBOptimizations() : Pass("BBOptimizations", kNoNodes, "5_post_bbo_cfg") { + BBOptimizations() : PassME("BBOptimizations", kNoNodes, "5_post_bbo_cfg") { } - bool Gate(const CompilationUnit* cUnit) const { + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(cUnit != nullptr); return ((cUnit->disable_opt & (1 << kBBOpt)) == 0); } - void Start(CompilationUnit* cUnit) const; + void Start(const PassDataHolder* data) const; }; } // namespace art diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h index b45d6a45ae..62973afc38 100644 --- a/compiler/dex/dataflow_iterator.h +++ b/compiler/dex/dataflow_iterator.h @@ -326,6 +326,81 @@ namespace art { GrowableArray<BasicBlock*>::Iterator all_nodes_iterator_; /**< @brief The list of all the nodes */ }; + /** + * @class TopologicalSortIterator + * @brief Used to perform a Topological Sort Iteration of a MIRGraph. + */ + class TopologicalSortIterator : public DataflowIterator { + public: + /** + * @brief The constructor, using all of the reachable blocks of the MIRGraph. + * @param mir_graph The MIRGraph considered. + */ + explicit TopologicalSortIterator(MIRGraph* mir_graph) + : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ? + mir_graph->GetTopologicalSortOrder()->Size() : 0) { + // Extra setup for TopologicalSortIterator. + idx_ = start_idx_; + block_id_list_ = mir_graph->GetTopologicalSortOrder(); + + if (mir_graph->GetTopologicalSortOrder() == nullptr) { + /* Compute the topological order */ + mir_graph->ComputeTopologicalSortOrder(); + } + } + + /** + * @brief Get the next BasicBlock depending on iteration order. + * @param had_change did the user of the iteration change the previous BasicBlock. + * @return the next BasicBlock following the iteration order, 0 if finished. + */ + virtual BasicBlock* Next(bool had_change = false) { + // Update changed: if had_changed is true, we remember it for the whole iteration. + changed_ |= had_change; + + return ForwardSingleNext(); + } + }; + + /** + * @class RepeatingTopologicalSortIterator + * @brief Used to perform a Topological Sort Iteration of a MIRGraph. + * @details If there is a change during an iteration, the iteration starts over at the end of the + * iteration. + */ + class RepeatingTopologicalSortIterator : public DataflowIterator { + public: + /** + * @brief The constructor, using all of the reachable blocks of the MIRGraph. + * @param mir_graph The MIRGraph considered. + */ + explicit RepeatingTopologicalSortIterator(MIRGraph* mir_graph) + : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ? + mir_graph->GetTopologicalSortOrder()->Size() : 0) { + // Extra setup for RepeatingTopologicalSortIterator. + idx_ = start_idx_; + block_id_list_ = mir_graph->GetTopologicalSortOrder(); + + if (mir_graph->GetTopologicalSortOrder() == nullptr) { + /* Compute the topological order */ + mir_graph->ComputeTopologicalSortOrder(); + } + } + + /** + * @brief Get the next BasicBlock depending on iteration order. + * @param had_change did the user of the iteration change the previous BasicBlock. + * @return the next BasicBlock following the iteration order, 0 if finished. + */ + virtual BasicBlock* Next(bool had_change = false) { + // Update changed: if had_changed is true, we remember it for the whole iteration. + changed_ |= had_change; + + return ForwardRepeatNext(); + } + }; + + } // namespace art #endif // ART_COMPILER_DEX_DATAFLOW_ITERATOR_H_ diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index 77b5057538..63e38317f9 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -21,7 +21,7 @@ #include "dataflow_iterator-inl.h" #include "leb128.h" #include "mirror/object.h" -#include "pass_driver.h" +#include "pass_driver_me.h" #include "runtime.h" #include "base/logging.h" #include "base/timing_logger.h" @@ -136,22 +136,22 @@ void CompilationUnit::EndTiming() { // TODO: Remove this when we are able to compile everything. int arm64_support_list[] = { Instruction::NOP, - // Instruction::MOVE, - // Instruction::MOVE_FROM16, - // Instruction::MOVE_16, - // Instruction::MOVE_WIDE, - // Instruction::MOVE_WIDE_FROM16, - // Instruction::MOVE_WIDE_16, - // Instruction::MOVE_OBJECT, - // Instruction::MOVE_OBJECT_FROM16, - // Instruction::MOVE_OBJECT_16, + Instruction::MOVE, + Instruction::MOVE_FROM16, + Instruction::MOVE_16, + Instruction::MOVE_WIDE, + Instruction::MOVE_WIDE_FROM16, + Instruction::MOVE_WIDE_16, + Instruction::MOVE_OBJECT, + Instruction::MOVE_OBJECT_FROM16, + Instruction::MOVE_OBJECT_16, // Instruction::MOVE_RESULT, // Instruction::MOVE_RESULT_WIDE, // Instruction::MOVE_RESULT_OBJECT, Instruction::MOVE_EXCEPTION, Instruction::RETURN_VOID, - // Instruction::RETURN, - // Instruction::RETURN_WIDE, + Instruction::RETURN, + Instruction::RETURN_WIDE, // Instruction::RETURN_OBJECT, // Instruction::CONST_4, // Instruction::CONST_16, @@ -184,7 +184,7 @@ int arm64_support_list[] = { // Instruction::CMPG_FLOAT, // Instruction::CMPL_DOUBLE, // Instruction::CMPG_DOUBLE, - // Instruction::CMP_LONG, + Instruction::CMP_LONG, // Instruction::IF_EQ, // Instruction::IF_NE, // Instruction::IF_LT, @@ -258,16 +258,16 @@ int arm64_support_list[] = { // Instruction::INVOKE_INTERFACE_RANGE, // Instruction::UNUSED_79, // Instruction::UNUSED_7A, - // Instruction::NEG_INT, - // Instruction::NOT_INT, - // Instruction::NEG_LONG, - // Instruction::NOT_LONG, + Instruction::NEG_INT, + Instruction::NOT_INT, + Instruction::NEG_LONG, + Instruction::NOT_LONG, // Instruction::NEG_FLOAT, // Instruction::NEG_DOUBLE, - // Instruction::INT_TO_LONG, + Instruction::INT_TO_LONG, // Instruction::INT_TO_FLOAT, // Instruction::INT_TO_DOUBLE, - // Instruction::LONG_TO_INT, + Instruction::LONG_TO_INT, // Instruction::LONG_TO_FLOAT, // Instruction::LONG_TO_DOUBLE, // Instruction::FLOAT_TO_INT, @@ -276,31 +276,31 @@ int arm64_support_list[] = { // Instruction::DOUBLE_TO_INT, // Instruction::DOUBLE_TO_LONG, // Instruction::DOUBLE_TO_FLOAT, - // Instruction::INT_TO_BYTE, - // Instruction::INT_TO_CHAR, - // Instruction::INT_TO_SHORT, - // Instruction::ADD_INT, - // Instruction::SUB_INT, - // Instruction::MUL_INT, - // Instruction::DIV_INT, - // Instruction::REM_INT, - // Instruction::AND_INT, - // Instruction::OR_INT, - // Instruction::XOR_INT, - // Instruction::SHL_INT, - // Instruction::SHR_INT, - // Instruction::USHR_INT, - // Instruction::ADD_LONG, - // Instruction::SUB_LONG, - // Instruction::MUL_LONG, - // Instruction::DIV_LONG, - // Instruction::REM_LONG, - // Instruction::AND_LONG, - // Instruction::OR_LONG, - // Instruction::XOR_LONG, - // Instruction::SHL_LONG, - // Instruction::SHR_LONG, - // Instruction::USHR_LONG, + Instruction::INT_TO_BYTE, + Instruction::INT_TO_CHAR, + Instruction::INT_TO_SHORT, + Instruction::ADD_INT, + Instruction::SUB_INT, + Instruction::MUL_INT, + Instruction::DIV_INT, + Instruction::REM_INT, + Instruction::AND_INT, + Instruction::OR_INT, + Instruction::XOR_INT, + Instruction::SHL_INT, + Instruction::SHR_INT, + Instruction::USHR_INT, + Instruction::ADD_LONG, + Instruction::SUB_LONG, + Instruction::MUL_LONG, + Instruction::DIV_LONG, + Instruction::REM_LONG, + Instruction::AND_LONG, + Instruction::OR_LONG, + Instruction::XOR_LONG, + Instruction::SHL_LONG, + Instruction::SHR_LONG, + Instruction::USHR_LONG, // Instruction::ADD_FLOAT, // Instruction::SUB_FLOAT, // Instruction::MUL_FLOAT, @@ -311,28 +311,28 @@ int arm64_support_list[] = { // Instruction::MUL_DOUBLE, // Instruction::DIV_DOUBLE, // Instruction::REM_DOUBLE, - // Instruction::ADD_INT_2ADDR, - // Instruction::SUB_INT_2ADDR, - // Instruction::MUL_INT_2ADDR, - // Instruction::DIV_INT_2ADDR, - // Instruction::REM_INT_2ADDR, - // Instruction::AND_INT_2ADDR, - // Instruction::OR_INT_2ADDR, - // Instruction::XOR_INT_2ADDR, - // Instruction::SHL_INT_2ADDR, - // Instruction::SHR_INT_2ADDR, - // Instruction::USHR_INT_2ADDR, - // Instruction::ADD_LONG_2ADDR, - // Instruction::SUB_LONG_2ADDR, - // Instruction::MUL_LONG_2ADDR, - // Instruction::DIV_LONG_2ADDR, - // Instruction::REM_LONG_2ADDR, - // Instruction::AND_LONG_2ADDR, - // Instruction::OR_LONG_2ADDR, - // Instruction::XOR_LONG_2ADDR, - // Instruction::SHL_LONG_2ADDR, - // Instruction::SHR_LONG_2ADDR, - // Instruction::USHR_LONG_2ADDR, + Instruction::ADD_INT_2ADDR, + Instruction::SUB_INT_2ADDR, + Instruction::MUL_INT_2ADDR, + Instruction::DIV_INT_2ADDR, + Instruction::REM_INT_2ADDR, + Instruction::AND_INT_2ADDR, + Instruction::OR_INT_2ADDR, + Instruction::XOR_INT_2ADDR, + Instruction::SHL_INT_2ADDR, + Instruction::SHR_INT_2ADDR, + Instruction::USHR_INT_2ADDR, + Instruction::ADD_LONG_2ADDR, + Instruction::SUB_LONG_2ADDR, + Instruction::MUL_LONG_2ADDR, + Instruction::DIV_LONG_2ADDR, + Instruction::REM_LONG_2ADDR, + Instruction::AND_LONG_2ADDR, + Instruction::OR_LONG_2ADDR, + Instruction::XOR_LONG_2ADDR, + Instruction::SHL_LONG_2ADDR, + Instruction::SHR_LONG_2ADDR, + Instruction::USHR_LONG_2ADDR, // Instruction::ADD_FLOAT_2ADDR, // Instruction::SUB_FLOAT_2ADDR, // Instruction::MUL_FLOAT_2ADDR, @@ -343,25 +343,25 @@ int arm64_support_list[] = { // Instruction::MUL_DOUBLE_2ADDR, // Instruction::DIV_DOUBLE_2ADDR, // Instruction::REM_DOUBLE_2ADDR, - // Instruction::ADD_INT_LIT16, - // Instruction::RSUB_INT, - // Instruction::MUL_INT_LIT16, - // Instruction::DIV_INT_LIT16, - // Instruction::REM_INT_LIT16, - // Instruction::AND_INT_LIT16, - // Instruction::OR_INT_LIT16, - // Instruction::XOR_INT_LIT16, + Instruction::ADD_INT_LIT16, + Instruction::RSUB_INT, + Instruction::MUL_INT_LIT16, + Instruction::DIV_INT_LIT16, + Instruction::REM_INT_LIT16, + Instruction::AND_INT_LIT16, + Instruction::OR_INT_LIT16, + Instruction::XOR_INT_LIT16, Instruction::ADD_INT_LIT8, - // Instruction::RSUB_INT_LIT8, - // Instruction::MUL_INT_LIT8, - // Instruction::DIV_INT_LIT8, - // Instruction::REM_INT_LIT8, - // Instruction::AND_INT_LIT8, - // Instruction::OR_INT_LIT8, - // Instruction::XOR_INT_LIT8, - // Instruction::SHL_INT_LIT8, - // Instruction::SHR_INT_LIT8, - // Instruction::USHR_INT_LIT8, + Instruction::RSUB_INT_LIT8, + Instruction::MUL_INT_LIT8, + Instruction::DIV_INT_LIT8, + Instruction::REM_INT_LIT8, + Instruction::AND_INT_LIT8, + Instruction::OR_INT_LIT8, + Instruction::XOR_INT_LIT8, + Instruction::SHL_INT_LIT8, + Instruction::SHR_INT_LIT8, + Instruction::USHR_INT_LIT8, // Instruction::IGET_QUICK, // Instruction::IGET_WIDE_QUICK, // Instruction::IGET_OBJECT_QUICK, @@ -403,7 +403,7 @@ int arm64_support_list[] = { // kMirOpNop, // kMirOpNullCheck, // kMirOpRangeCheck, - // kMirOpDivZeroCheck, + kMirOpDivZeroCheck, kMirOpCheck, // kMirOpCheckPart2, // kMirOpSelect, @@ -699,7 +699,7 @@ int x86_64_support_list[] = { // V : void // (ARM64) Current calling conversion only support 32bit softfp // which has problems with long, float, double -constexpr char arm64_supported_types[] = "ZBSCILV"; +constexpr char arm64_supported_types[] = "ZBSCILVJ"; // (x84_64) We still have troubles with compiling longs/doubles/floats constexpr char x86_64_supported_types[] = "ZBSCILV"; @@ -924,7 +924,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, } /* Create the pass driver and launch it */ - PassDriver pass_driver(&cu); + PassDriverME pass_driver(&cu); pass_driver.Launch(); if (cu.enable_debug & (1 << kDebugDumpCheckStats)) { diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc index 2b1c4207e8..e56e0160ca 100644 --- a/compiler/dex/local_value_numbering_test.cc +++ b/compiler/dex/local_value_numbering_test.cc @@ -144,7 +144,6 @@ class LocalValueNumberingTest : public testing::Test { mir->ssa_rep->fp_def = nullptr; // Not used by LVN. mir->dalvikInsn.opcode = def->opcode; mir->offset = i; // LVN uses offset only for debug output - mir->width = 1u; // Not used by LVN. mir->optimization_flags = 0u; if (i != 0u) { diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index ba4224ea78..c34a9f5638 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -17,6 +17,7 @@ #include "mir_graph.h" #include <inttypes.h> +#include <queue> #include "base/stl_util.h" #include "compiler_internals.h" @@ -76,6 +77,7 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) dfs_order_(NULL), dfs_post_order_(NULL), dom_post_order_traversal_(NULL), + topological_order_(nullptr), i_dom_list_(NULL), def_block_matrix_(NULL), temp_dalvik_register_v_(NULL), @@ -196,7 +198,7 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset, } orig_block->last_mir_insn = prev; - prev->next = NULL; + prev->next = nullptr; /* * Update the immediate predecessor block pointer so that outgoing edges @@ -220,6 +222,7 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset, while (p != bottom_block->last_mir_insn) { p = p->next; DCHECK(p != nullptr); + p->bb = bottom_block->id; int opcode = p->dalvikInsn.opcode; /* * Some messiness here to ensure that we only enter real opcodes and only the @@ -543,7 +546,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse new_block->start_offset = insn->offset; cur_block->fall_through = new_block->id; new_block->predecessors->Insert(cur_block->id); - MIR* new_insn = static_cast<MIR*>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR)); + MIR* new_insn = NewMIR(); *new_insn = *insn; insn->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheck); @@ -629,11 +632,10 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ /* Parse all instructions and put them into containing basic blocks */ while (code_ptr < code_end) { - MIR *insn = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR)); + MIR *insn = NewMIR(); insn->offset = current_offset_; insn->m_unit_index = current_method_; int width = ParseInsn(code_ptr, &insn->dalvikInsn); - insn->width = width; Instruction::Code opcode = insn->dalvikInsn.opcode; if (opcode_count_ != NULL) { opcode_count_[static_cast<int>(opcode)]++; @@ -924,7 +926,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff fclose(file); } -/* Insert an MIR instruction to the end of a basic block */ +/* Insert an MIR instruction to the end of a basic block. */ void BasicBlock::AppendMIR(MIR* mir) { if (first_mir_insn == nullptr) { DCHECK(last_mir_insn == nullptr); @@ -935,9 +937,11 @@ void BasicBlock::AppendMIR(MIR* mir) { mir->next = nullptr; last_mir_insn = mir; } + + mir->bb = id; } -/* Insert an MIR instruction to the head of a basic block */ +/* Insert an MIR instruction to the head of a basic block. */ void BasicBlock::PrependMIR(MIR* mir) { if (first_mir_insn == nullptr) { DCHECK(last_mir_insn == nullptr); @@ -947,17 +951,53 @@ void BasicBlock::PrependMIR(MIR* mir) { mir->next = first_mir_insn; first_mir_insn = mir; } + + mir->bb = id; } -/* Insert a MIR instruction after the specified MIR */ +/* Insert a MIR instruction after the specified MIR. */ void BasicBlock::InsertMIRAfter(MIR* current_mir, MIR* new_mir) { new_mir->next = current_mir->next; current_mir->next = new_mir; if (last_mir_insn == current_mir) { - /* Is the last MIR in the block */ + /* Is the last MIR in the block? */ last_mir_insn = new_mir; } + + new_mir->bb = id; +} + +MIR* BasicBlock::FindPreviousMIR(MIR* mir) { + MIR* current = first_mir_insn; + + while (current != nullptr) { + MIR* next = current->next; + + if (next == mir) { + return current; + } + + current = next; + } + + return nullptr; +} + +void BasicBlock::InsertMIRBefore(MIR* current_mir, MIR* new_mir) { + if (first_mir_insn == current_mir) { + /* Is the first MIR in the block? */ + first_mir_insn = new_mir; + new_mir->bb = id; + } + + MIR* prev = FindPreviousMIR(current_mir); + + if (prev != nullptr) { + prev->next = new_mir; + new_mir->next = current_mir; + new_mir->bb = id; + } } MIR* BasicBlock::GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current) { @@ -1240,6 +1280,12 @@ CallInfo* MIRGraph::NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, return info; } +// Allocate a new MIR. +MIR* MIRGraph::NewMIR() { + MIR* mir = new (arena_) MIR(); + return mir; +} + // Allocate a new basic block. BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) { BasicBlock* bb = static_cast<BasicBlock*>(arena_->Alloc(sizeof(BasicBlock), @@ -1293,6 +1339,104 @@ void MIRGraph::InitializeSSATransformation() { DoDFSPreOrderSSARename(GetEntryBlock()); } +void MIRGraph::ComputeTopologicalSortOrder() { + std::queue<BasicBlock *> q; + std::map<int, int> visited_cnt_values; + + // Clear the nodes. + ClearAllVisitedFlags(); + + // Create the topological order if need be. + if (topological_order_ != nullptr) { + topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, 0); + } + topological_order_->Reset(); + + // Set up visitedCntValues map for all BB. The default value for this counters in the map is zero. + // also fill initial queue. + GrowableArray<BasicBlock*>::Iterator iterator(&block_list_); + + while (true) { + BasicBlock* bb = iterator.Next(); + + if (bb == nullptr) { + break; + } + + if (bb->hidden == true) { + continue; + } + + visited_cnt_values[bb->id] = bb->predecessors->Size(); + + GrowableArray<BasicBlockId>::Iterator pred_iterator(bb->predecessors); + // To process loops we should not wait for dominators. + while (true) { + BasicBlock* pred_bb = GetBasicBlock(pred_iterator.Next()); + + if (pred_bb == nullptr) { + break; + } + + if (pred_bb->dominators == nullptr || pred_bb->hidden == true) { + continue; + } + + // Skip the backward branch. + if (pred_bb->dominators->IsBitSet(bb->id) != 0) { + visited_cnt_values[bb->id]--; + } + } + + // Add entry block to queue. + if (visited_cnt_values[bb->id] == 0) { + q.push(bb); + } + } + + while (q.size() > 0) { + // Get top. + BasicBlock *bb = q.front(); + q.pop(); + + DCHECK_EQ(bb->hidden, false); + + if (bb->IsExceptionBlock() == true) { + continue; + } + + // We've visited all the predecessors. So, we can visit bb. + if (bb->visited == false) { + bb->visited = true; + + // Now add the basic block. + topological_order_->Insert(bb->id); + + // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero. + ChildBlockIterator succIter(bb, this); + BasicBlock *successor = succIter.Next(); + while (successor != nullptr) { + // one more predecessor was visited. + visited_cnt_values[successor->id]--; + + if (visited_cnt_values[successor->id] <= 0 && successor->visited == false && successor->hidden == false) { + q.push(successor); + } + + // Take next successor. + successor = succIter.Next(); + } + } + } +} + +bool BasicBlock::IsExceptionBlock() const { + if (block_type == kExceptionHandling) { + return true; + } + return false; +} + ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph) : basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false), visited_taken_(false), have_successors_(false) { @@ -1344,4 +1488,106 @@ BasicBlock* ChildBlockIterator::Next() { return nullptr; } +bool BasicBlock::RemoveMIR(MIR* mir) { + if (mir == nullptr) { + return false; + } + + // Find the MIR, and the one before it if they exist. + MIR* current = nullptr; + MIR* prev = nullptr; + + // Find the mir we are looking for. + for (current = first_mir_insn; current != nullptr; prev = current, current = current->next) { + if (current == mir) { + break; + } + } + + // Did we find it? + if (current != nullptr) { + MIR* next = current->next; + + // Just update the links of prev and next and current is almost gone. + if (prev != nullptr) { + prev->next = next; + } + + // Exceptions are if first or last mirs are invoke. + if (first_mir_insn == current) { + first_mir_insn = next; + } + + if (last_mir_insn == current) { + last_mir_insn = prev; + } + + // Found it and removed it. + return true; + } + + // We did not find it. + return false; +} + +MIR* MIR::Copy(MIRGraph* mir_graph) { + MIR* res = mir_graph->NewMIR(); + *res = *this; + + // Remove links + res->next = nullptr; + res->bb = NullBasicBlockId; + res->ssa_rep = nullptr; + + return res; +} + +MIR* MIR::Copy(CompilationUnit* c_unit) { + return Copy(c_unit->mir_graph.get()); +} + +uint32_t SSARepresentation::GetStartUseIndex(Instruction::Code opcode) { + // Default result. + int res = 0; + + // We are basically setting the iputs to their igets counterparts. + switch (opcode) { + case Instruction::IPUT: + case Instruction::IPUT_OBJECT: + case Instruction::IPUT_BOOLEAN: + case Instruction::IPUT_BYTE: + case Instruction::IPUT_CHAR: + case Instruction::IPUT_SHORT: + case Instruction::IPUT_QUICK: + case Instruction::IPUT_OBJECT_QUICK: + case Instruction::APUT: + case Instruction::APUT_OBJECT: + case Instruction::APUT_BOOLEAN: + case Instruction::APUT_BYTE: + case Instruction::APUT_CHAR: + case Instruction::APUT_SHORT: + case Instruction::SPUT: + case Instruction::SPUT_OBJECT: + case Instruction::SPUT_BOOLEAN: + case Instruction::SPUT_BYTE: + case Instruction::SPUT_CHAR: + case Instruction::SPUT_SHORT: + // Skip the VR containing what to store. + res = 1; + break; + case Instruction::IPUT_WIDE: + case Instruction::IPUT_WIDE_QUICK: + case Instruction::APUT_WIDE: + case Instruction::SPUT_WIDE: + // Skip the two VRs containing what to store. + res = 2; + break; + default: + // Do nothing in the general case. + break; + } + + return res; +} + } // namespace art diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 11d2fbe039..3a00a434b8 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -242,6 +242,8 @@ struct SSARepresentation { bool* fp_use; int32_t* defs; bool* fp_def; + + static uint32_t GetStartUseIndex(Instruction::Code opcode); }; /* @@ -261,12 +263,15 @@ struct MIR { uint32_t vC; uint32_t arg[5]; /* vC/D/E/F/G in invoke or filled-new-array */ Instruction::Code opcode; + + explicit DecodedInstruction():vA(0), vB(0), vB_wide(0), vC(0), opcode(Instruction::NOP) { + } } dalvikInsn; - uint16_t width; // Note: width can include switch table or fill array data. NarrowDexOffset offset; // Offset of the instruction in code units. uint16_t optimization_flags; int16_t m_unit_index; // From which method was this MIR included + BasicBlockId bb; MIR* next; SSARepresentation* ssa_rep; union { @@ -285,6 +290,23 @@ struct MIR { // INVOKE data index, points to MIRGraph::method_lowering_infos_. uint32_t method_lowering_info; } meta; + + explicit MIR():offset(0), optimization_flags(0), m_unit_index(0), bb(NullBasicBlockId), + next(nullptr), ssa_rep(nullptr) { + memset(&meta, 0, sizeof(meta)); + } + + uint32_t GetStartUseIndex() const { + return SSARepresentation::GetStartUseIndex(dalvikInsn.opcode); + } + + MIR* Copy(CompilationUnit *c_unit); + MIR* Copy(MIRGraph* mir_Graph); + + static void* operator new(size_t size, ArenaAllocator* arena) { + return arena->Alloc(sizeof(MIR), kArenaAllocMIR); + } + static void operator delete(void* p) {} // Nop. }; struct SuccessorBlockInfo; @@ -319,6 +341,8 @@ struct BasicBlock { void AppendMIR(MIR* mir); void PrependMIR(MIR* mir); void InsertMIRAfter(MIR* current_mir, MIR* new_mir); + void InsertMIRBefore(MIR* current_mir, MIR* new_mir); + MIR* FindPreviousMIR(MIR* mir); /** * @brief Used to obtain the next MIR that follows unconditionally. @@ -329,11 +353,13 @@ struct BasicBlock { * @return Returns the following MIR if one can be found. */ MIR* GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current); + bool RemoveMIR(MIR* mir); + bool IsExceptionBlock() const; }; /* * The "blocks" field in "successor_block_list" points to an array of elements with the type - * "SuccessorBlockInfo". For catch blocks, key is type index for the exception. For swtich + * "SuccessorBlockInfo". For catch blocks, key is type index for the exception. For switch * blocks, key is the case value. */ struct SuccessorBlockInfo { @@ -573,6 +599,10 @@ class MIRGraph { void BasicBlockOptimization(); + GrowableArray<BasicBlockId>* GetTopologicalSortOrder() { + return topological_order_; + } + bool IsConst(int32_t s_reg) const { return is_constant_v_->IsBitSet(s_reg); } @@ -836,9 +866,11 @@ class MIRGraph { void DumpMIRGraph(); CallInfo* NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range); BasicBlock* NewMemBB(BBType block_type, int block_id); + MIR* NewMIR(); MIR* AdvanceMIR(BasicBlock** p_bb, MIR* mir); BasicBlock* NextDominatedBlock(BasicBlock* bb); bool LayoutBlocks(BasicBlock* bb); + void ComputeTopologicalSortOrder(); bool InlineCallsGate(); void InlineCallsStart(); @@ -977,6 +1009,7 @@ class MIRGraph { GrowableArray<BasicBlockId>* dfs_order_; GrowableArray<BasicBlockId>* dfs_post_order_; GrowableArray<BasicBlockId>* dom_post_order_traversal_; + GrowableArray<BasicBlockId>* topological_order_; int* i_dom_list_; ArenaBitVector** def_block_matrix_; // num_dalvik_register x num_blocks. ArenaBitVector* temp_dalvik_register_v_; diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 749a2356eb..1d4aef2183 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -268,13 +268,22 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) DCHECK_EQ(ct_type, kCompilerTempVR); // The new non-special compiler temp must receive a unique v_reg with a negative value. - compiler_temp->v_reg = static_cast<int>(kVRegNonSpecialTempBaseReg) - num_non_special_compiler_temps_; + compiler_temp->v_reg = static_cast<int>(kVRegNonSpecialTempBaseReg) - + num_non_special_compiler_temps_; compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg); num_non_special_compiler_temps_++; if (wide) { - // Ensure that the two registers are consecutive. Since the virtual registers used for temps grow in a - // negative fashion, we need the smaller to refer to the low part. Thus, we redefine the v_reg and s_reg_low. + // Create a new CompilerTemp for the high part. + CompilerTemp *compiler_temp_high = + static_cast<CompilerTemp *>(arena_->Alloc(sizeof(CompilerTemp), kArenaAllocRegAlloc)); + compiler_temp_high->v_reg = compiler_temp->v_reg; + compiler_temp_high->s_reg_low = compiler_temp->s_reg_low; + compiler_temps_.Insert(compiler_temp_high); + + // Ensure that the two registers are consecutive. Since the virtual registers used for temps + // grow in a negative fashion, we need the smaller to refer to the low part. Thus, we + // redefine the v_reg and s_reg_low. compiler_temp->v_reg--; int ssa_reg_high = compiler_temp->s_reg_low; compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg); diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc index 891d9fb7ea..86092b6e3d 100644 --- a/compiler/dex/mir_optimization_test.cc +++ b/compiler/dex/mir_optimization_test.cc @@ -170,7 +170,6 @@ class ClassInitCheckEliminationTest : public testing::Test { } mir->ssa_rep = nullptr; mir->offset = 2 * i; // All insns need to be at least 2 code units long. - mir->width = 2u; mir->optimization_flags = 0u; merged_df_flags |= MIRGraph::GetDataFlowAttributes(def->opcode); } diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h index 9457d5be76..4ce040e9ab 100644 --- a/compiler/dex/pass.h +++ b/compiler/dex/pass.h @@ -19,6 +19,7 @@ #include <string> +#include "base/macros.h" namespace art { // Forward declarations. @@ -26,42 +27,18 @@ struct BasicBlock; struct CompilationUnit; class Pass; -/** - * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass. - * @details Each enum should be a power of 2 to be correctly used. - */ -enum OptimizationFlag { -}; - -enum DataFlowAnalysisMode { - kAllNodes = 0, /**< @brief All nodes. */ - kPreOrderDFSTraversal, /**< @brief Depth-First-Search / Pre-Order. */ - kRepeatingPreOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Pre-Order. */ - kReversePostOrderDFSTraversal, /**< @brief Depth-First-Search / Reverse Post-Order. */ - kRepeatingPostOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Post-Order. */ - kRepeatingReversePostOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */ - kPostOrderDOMTraversal, /**< @brief Dominator tree / Post-Order. */ - kNoNodes, /**< @brief Skip BasicBlock traversal. */ +// Empty Pass Data Class, can be extended by any pass extending the base Pass class. +class PassDataHolder { }; /** * @class Pass - * @brief Pass is the Pass structure for the optimizations. - * @details The following structure has the different optimization passes that we are going to do. + * @brief Base Pass class, can be extended to perform a more defined way of doing the work call. */ class Pass { public: - explicit Pass(const char* name, DataFlowAnalysisMode type = kAllNodes, - unsigned int flags = 0u, const char* dump = "") - : pass_name_(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) { - } - - Pass(const char* name, DataFlowAnalysisMode type, const char* dump) - : pass_name_(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) { - } - - Pass(const char* name, const char* dump) - : pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) { + explicit Pass(const char* name) + : pass_name_(name) { } virtual ~Pass() { @@ -71,59 +48,42 @@ class Pass { return pass_name_; } - virtual DataFlowAnalysisMode GetTraversal() const { - return traversal_type_; - } - - virtual bool GetFlag(OptimizationFlag flag) const { - return (flags_ & flag); - } - - const char* GetDumpCFGFolder() const { - return dump_cfg_folder_; - } - /** * @brief Gate for the pass: determines whether to execute the pass or not considering a CompilationUnit - * @param c_unit the CompilationUnit. - * @return whether or not to execute the pass + * @param data the PassDataHolder. + * @return whether or not to execute the pass. */ - virtual bool Gate(const CompilationUnit* c_unit) const { + virtual bool Gate(const PassDataHolder* data) const { // Unused parameter. - UNUSED(c_unit); + UNUSED(data); // Base class says yes. return true; } /** - * @brief Start of the pass: called before the WalkBasicBlocks function - * @param c_unit the considered CompilationUnit. + * @brief Start of the pass: called before the Worker function. */ - virtual void Start(CompilationUnit* c_unit) const { + virtual void Start(const PassDataHolder* data) const { // Unused parameter. - UNUSED(c_unit); + UNUSED(data); } /** - * @brief End of the pass: called after the WalkBasicBlocks function - * @param c_unit the considered CompilationUnit. + * @brief End of the pass: called after the WalkBasicBlocks function. */ - virtual void End(CompilationUnit* c_unit) const { + virtual void End(const PassDataHolder* data) const { // Unused parameter. - UNUSED(c_unit); + UNUSED(data); } /** - * @brief Actually walk the BasicBlocks following a particular traversal type. - * @param c_unit the CompilationUnit. - * @param bb the BasicBlock. + * @param data the object containing data necessary for the pass. * @return whether or not there is a change when walking the BasicBlock */ - virtual bool WalkBasicBlocks(CompilationUnit* c_unit, BasicBlock* bb) const { - // Unused parameters. - UNUSED(c_unit); - UNUSED(bb); + virtual bool Worker(const PassDataHolder* data) const { + // Unused parameter. + UNUSED(data); // BasicBlock did not change. return false; @@ -133,15 +93,6 @@ class Pass { /** @brief The pass name: used for searching for a pass when running a particular pass or debugging. */ const char* const pass_name_; - /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */ - const DataFlowAnalysisMode traversal_type_; - - /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */ - const unsigned int flags_; - - /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */ - const char* const dump_cfg_folder_; - private: // In order to make the all passes not copy-friendly. DISALLOW_COPY_AND_ASSIGN(Pass); diff --git a/compiler/dex/pass_driver.cc b/compiler/dex/pass_driver.cc index 999ed2af5c..ca936cd41a 100644 --- a/compiler/dex/pass_driver.cc +++ b/compiler/dex/pass_driver.cc @@ -162,6 +162,12 @@ void PassDriver::DispatchPass(CompilationUnit* c_unit, const Pass* curPass) { case kAllNodes: DoWalkBasicBlocks<AllNodesIterator>(c_unit, curPass); break; + case kTopologicalSortTraversal: + DoWalkBasicBlocks<TopologicalSortIterator>(c_unit, curPass); + break; + case kRepeatingTopologicalSortTraversal: + DoWalkBasicBlocks<RepeatingTopologicalSortIterator>(c_unit, curPass); + break; case kNoNodes: break; default: diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h index 2b7196e187..aa0d1ae462 100644 --- a/compiler/dex/pass_driver.h +++ b/compiler/dex/pass_driver.h @@ -22,77 +22,169 @@ #include "safe_map.h" // Forward Declarations. -class CompilationUnit; class Pass; - +class PassDriver; namespace art { +/** + * @brief Helper function to create a single instance of a given Pass and can be shared across + * the threads. + */ +template <typename PassType> +const Pass* GetPassInstance() { + static const PassType pass; + return &pass; +} + +// Empty holder for the constructor. +class PassDriverDataHolder { +}; /** * @class PassDriver - * @brief PassDriver is the wrapper around all Pass instances in order to execute them from the Middle-End + * @brief PassDriver is the wrapper around all Pass instances in order to execute them */ +template <typename PassDriverType> class PassDriver { public: - explicit PassDriver(CompilationUnit* cu, bool create_default_passes = true); + explicit PassDriver() { + InitializePasses(); + } - ~PassDriver(); + virtual ~PassDriver() { + } /** * @brief Insert a Pass: can warn if multiple passes have the same name. - * @param new_pass the new Pass to insert in the map and list. - * @param warn_override warn if the name of the Pass is already used. */ - void InsertPass(const Pass* new_pass); + void InsertPass(const Pass* new_pass) { + DCHECK(new_pass != nullptr); + DCHECK(new_pass->GetName() != nullptr && new_pass->GetName()[0] != 0); + + // It is an error to override an existing pass. + DCHECK(GetPass(new_pass->GetName()) == nullptr) + << "Pass name " << new_pass->GetName() << " already used."; + + // Now add to the list. + pass_list_.push_back(new_pass); + } /** * @brief Run a pass using the name as key. - * @param c_unit the considered CompilationUnit. - * @param pass_name the Pass name. * @return whether the pass was applied. */ - bool RunPass(CompilationUnit* c_unit, const char* pass_name); + virtual bool RunPass(const char* pass_name) { + // Paranoid: c_unit cannot be nullptr and we need a pass name. + DCHECK(pass_name != nullptr && pass_name[0] != 0); + + const Pass* cur_pass = GetPass(pass_name); + + if (cur_pass != nullptr) { + return RunPass(cur_pass); + } + + // Return false, we did not find the pass. + return false; + } /** - * @brief Run a pass using the Pass itself. - * @param time_split do we want a time split request(default: false)? - * @return whether the pass was applied. + * @brief Runs all the passes with the pass_list_. */ - bool RunPass(CompilationUnit* c_unit, const Pass* pass, bool time_split = false); + void Launch() { + for (const Pass* cur_pass : pass_list_) { + RunPass(cur_pass); + } + } - void Launch(); + /** + * @brief Searches for a particular pass. + * @param the name of the pass to be searched for. + */ + const Pass* GetPass(const char* name) const { + for (const Pass* cur_pass : pass_list_) { + if (strcmp(name, cur_pass->GetName()) == 0) { + return cur_pass; + } + } + return nullptr; + } - void HandlePassFlag(CompilationUnit* c_unit, const Pass* pass); + static void CreateDefaultPassList(const std::string& disable_passes) { + // Insert each pass from g_passes into g_default_pass_list. + PassDriverType::g_default_pass_list.clear(); + PassDriverType::g_default_pass_list.reserve(PassDriver<PassDriverType>::g_passes_size); + for (uint16_t i = 0; i < PassDriver<PassDriverType>::g_passes_size; ++i) { + const Pass* pass = PassDriver<PassDriverType>::g_passes[i]; + // Check if we should disable this pass. + if (disable_passes.find(pass->GetName()) != std::string::npos) { + LOG(INFO) << "Skipping " << pass->GetName(); + } else { + PassDriver<PassDriverType>::g_default_pass_list.push_back(pass); + } + } + } /** - * @brief Apply a patch: perform start/work/end functions. + * @brief Run a pass using the Pass itself. + * @param time_split do we want a time split request(default: false)? + * @return whether the pass was applied. */ - void ApplyPass(CompilationUnit* c_unit, const Pass* pass); + virtual bool RunPass(const Pass* pass, bool time_split = false) = 0; /** - * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode + * @brief Print the pass names of all the passes available. */ - void DispatchPass(CompilationUnit* c_unit, const Pass* pass); + static void PrintPassNames() { + LOG(INFO) << "Loop Passes are:"; - static void PrintPassNames(); - static void CreateDefaultPassList(const std::string& disable_passes); + for (const Pass* cur_pass : PassDriver<PassDriverType>::g_default_pass_list) { + LOG(INFO) << "\t-" << cur_pass->GetName(); + } + } - const Pass* GetPass(const char* name) const; + protected: + /** + * @brief Gets the list of passes currently schedule to execute. + * @return pass_list_ + */ + std::vector<const Pass*>& GetPasses() { + return pass_list_; + } - const char* GetDumpCFGFolder() const { - return dump_cfg_folder_; + virtual void InitializePasses() { + SetDefaultPasses(); } - protected: - void CreatePasses(); + void SetDefaultPasses() { + pass_list_ = PassDriver<PassDriverType>::g_default_pass_list; + } + + /** + * @brief Apply a patch: perform start/work/end functions. + */ + virtual void ApplyPass(PassDataHolder* data, const Pass* pass) { + pass->Start(data); + DispatchPass(pass); + pass->End(data); + } + /** + * @brief Dispatch a patch. + * Gives the ability to add logic when running the patch. + */ + virtual void DispatchPass(const Pass* pass) { + UNUSED(pass); + } /** @brief List of passes: provides the order to execute the passes. */ std::vector<const Pass*> pass_list_; - /** @brief The CompilationUnit on which to execute the passes on. */ - CompilationUnit* const cu_; + /** @brief The number of passes within g_passes. */ + static const uint16_t g_passes_size; + + /** @brief The number of passes within g_passes. */ + static const Pass* const g_passes[]; - /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */ - const char* dump_cfg_folder_; + /** @brief The default pass list is used to initialize pass_list_. */ + static std::vector<const Pass*> g_default_pass_list; }; } // namespace art diff --git a/compiler/dex/pass_driver_me.cc b/compiler/dex/pass_driver_me.cc new file mode 100644 index 0000000000..d0545004f7 --- /dev/null +++ b/compiler/dex/pass_driver_me.cc @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "base/macros.h" +#include "bb_optimizations.h" +#include "compiler_internals.h" +#include "dataflow_iterator.h" +#include "dataflow_iterator-inl.h" +#include "pass_driver_me.h" + +namespace art { + +namespace { // anonymous namespace + +void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass, DataflowIterator* iterator) { + // Paranoid: Check the iterator before walking the BasicBlocks. + DCHECK(iterator != nullptr); + bool change = false; + for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) { + data->bb = bb; + change = pass->Worker(data); + } +} + +template <typename Iterator> +inline void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass) { + DCHECK(data != nullptr); + CompilationUnit* c_unit = data->c_unit; + DCHECK(c_unit != nullptr); + Iterator iterator(c_unit->mir_graph.get()); + DoWalkBasicBlocks(data, pass, &iterator); +} +} // anonymous namespace + +/* + * Create the pass list. These passes are immutable and are shared across the threads. + * + * Advantage is that there will be no race conditions here. + * Disadvantage is the passes can't change their internal states depending on CompilationUnit: + * - This is not yet an issue: no current pass would require it. + */ +// The initial list of passes to be used by the PassDriveME. +template<> +const Pass* const PassDriver<PassDriverME>::g_passes[] = { + GetPassInstance<CacheFieldLoweringInfo>(), + GetPassInstance<CacheMethodLoweringInfo>(), + GetPassInstance<CallInlining>(), + GetPassInstance<CodeLayout>(), + GetPassInstance<SSATransformation>(), + GetPassInstance<ConstantPropagation>(), + GetPassInstance<InitRegLocations>(), + GetPassInstance<MethodUseCount>(), + GetPassInstance<NullCheckEliminationAndTypeInference>(), + GetPassInstance<ClassInitCheckElimination>(), + GetPassInstance<BBCombine>(), + GetPassInstance<BBOptimizations>(), +}; + +// The number of the passes in the initial list of Passes (g_passes). +template<> +uint16_t const PassDriver<PassDriverME>::g_passes_size = arraysize(PassDriver<PassDriverME>::g_passes); + +// The default pass list is used by the PassDriverME instance of PassDriver to initialize pass_list_. +template<> +std::vector<const Pass*> PassDriver<PassDriverME>::g_default_pass_list(PassDriver<PassDriverME>::g_passes, PassDriver<PassDriverME>::g_passes + PassDriver<PassDriverME>::g_passes_size); + +PassDriverME::PassDriverME(CompilationUnit* cu) + : PassDriver(), pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") { + pass_me_data_holder_.bb = nullptr; + pass_me_data_holder_.c_unit = cu; +} + +PassDriverME::~PassDriverME() { +} + +void PassDriverME::DispatchPass(const Pass* pass) { + VLOG(compiler) << "Dispatching " << pass->GetName(); + const PassME* me_pass = down_cast<const PassME*>(pass); + + DataFlowAnalysisMode mode = me_pass->GetTraversal(); + + switch (mode) { + case kPreOrderDFSTraversal: + DoWalkBasicBlocks<PreOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kRepeatingPreOrderDFSTraversal: + DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kRepeatingPostOrderDFSTraversal: + DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kReversePostOrderDFSTraversal: + DoWalkBasicBlocks<ReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kRepeatingReversePostOrderDFSTraversal: + DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kPostOrderDOMTraversal: + DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass); + break; + case kAllNodes: + DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass); + break; + case kNoNodes: + break; + default: + LOG(FATAL) << "Iterator mode not handled in dispatcher: " << mode; + break; + } +} + +bool PassDriverME::RunPass(const Pass* pass, bool time_split) { + // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name + DCHECK(pass != nullptr); + DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0); + CompilationUnit* c_unit = pass_me_data_holder_.c_unit; + DCHECK(c_unit != nullptr); + + // Do we perform a time split + if (time_split) { + c_unit->NewTimingSplit(pass->GetName()); + } + + // Check the pass gate first. + bool should_apply_pass = pass->Gate(&pass_me_data_holder_); + if (should_apply_pass) { + // Applying the pass: first start, doWork, and end calls. + ApplyPass(&pass_me_data_holder_, pass); + + // Do we want to log it? + if ((c_unit->enable_debug& (1 << kDebugDumpCFG)) != 0) { + // Do we have a pass folder? + const PassME* me_pass = (down_cast<const PassME*>(pass)); + const char* passFolder = me_pass->GetDumpCFGFolder(); + DCHECK(passFolder != nullptr); + + if (passFolder[0] != 0) { + // Create directory prefix. + std::string prefix = GetDumpCFGFolder(); + prefix += passFolder; + prefix += "/"; + + c_unit->mir_graph->DumpCFG(prefix.c_str(), false); + } + } + } + + // If the pass gate passed, we can declare success. + return should_apply_pass; +} + +const char* PassDriverME::GetDumpCFGFolder() const { + return dump_cfg_folder_; +} + + +} // namespace art diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h new file mode 100644 index 0000000000..0142934be2 --- /dev/null +++ b/compiler/dex/pass_driver_me.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_H_ +#define ART_COMPILER_DEX_PASS_DRIVER_ME_H_ + +#include "bb_optimizations.h" +#include "pass_driver.h" +#include "pass_me.h" + +namespace art { + +class PassDriverME: public PassDriver<PassDriverME> { + public: + explicit PassDriverME(CompilationUnit* cu); + ~PassDriverME(); + /** + * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode + */ + void DispatchPass(const Pass* pass); + bool RunPass(const Pass* pass, bool time_split = false); + const char* GetDumpCFGFolder() const; + protected: + /** @brief The data holder that contains data needed for the PassDriverME. */ + PassMEDataHolder pass_me_data_holder_; + + /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */ + const char* dump_cfg_folder_; +}; + +} // namespace art +#endif // ART_COMPILER_DEX_PASS_DRIVER_ME_H_ diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h new file mode 100644 index 0000000000..069fb45dc4 --- /dev/null +++ b/compiler/dex/pass_me.h @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_PASS_ME_H_ +#define ART_COMPILER_DEX_PASS_ME_H_ + +#include <string> +#include "pass.h" + +namespace art { + +// Forward declarations. +struct BasicBlock; +struct CompilationUnit; +class Pass; + +/** + * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass. + * @details Each enum should be a power of 2 to be correctly used. + */ +enum OptimizationFlag { +}; + +// Data holder class. +class PassMEDataHolder: public PassDataHolder { + public: + CompilationUnit* c_unit; + BasicBlock* bb; +}; + +enum DataFlowAnalysisMode { + kAllNodes = 0, /**< @brief All nodes. */ + kPreOrderDFSTraversal, /**< @brief Depth-First-Search / Pre-Order. */ + kRepeatingPreOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Pre-Order. */ + kReversePostOrderDFSTraversal, /**< @brief Depth-First-Search / Reverse Post-Order. */ + kRepeatingPostOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Post-Order. */ + kRepeatingReversePostOrderDFSTraversal, /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */ + kPostOrderDOMTraversal, /**< @brief Dominator tree / Post-Order. */ + kTopologicalSortTraversal, /**< @brief Topological Order traversal. */ + kRepeatingTopologicalSortTraversal, /**< @brief Repeating Topological Order traversal. */ + kNoNodes, /**< @brief Skip BasicBlock traversal. */ +}; + +/** + * @class Pass + * @brief Pass is the Pass structure for the optimizations. + * @details The following structure has the different optimization passes that we are going to do. + */ +class PassME: public Pass { + public: + explicit PassME(const char* name, DataFlowAnalysisMode type = kAllNodes, + unsigned int flags = 0u, const char* dump = "") + : Pass(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) { + } + + PassME(const char* name, DataFlowAnalysisMode type, const char* dump) + : Pass(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) { + } + + PassME(const char* name, const char* dump) + : Pass(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) { + } + + ~PassME() { + } + + virtual DataFlowAnalysisMode GetTraversal() const { + return traversal_type_; + } + + const char* GetDumpCFGFolder() const { + return dump_cfg_folder_; + } + + bool GetFlag(OptimizationFlag flag) const { + return (flags_ & flag); + } + + protected: + /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */ + const DataFlowAnalysisMode traversal_type_; + + /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */ + const unsigned int flags_; + + /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */ + const char* const dump_cfg_folder_; +}; +} // namespace art +#endif // ART_COMPILER_DEX_PASS_ME_H_ diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 2d1c19e2d2..f0a9ca4e82 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -120,6 +120,7 @@ class ArmMir2Lir FINAL : public Mir2Lir { bool GenInlinedSqrt(CallInfo* info); bool GenInlinedPeek(CallInfo* info, OpSize size); bool GenInlinedPoke(CallInfo* info, OpSize size); + void GenNotLong(RegLocation rl_dest, RegLocation rl_src); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -127,6 +128,8 @@ class ArmMir2Lir FINAL : public Mir2Lir { RegLocation rl_src2); void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div); RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div); RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div); void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 384a008668..2556788bed 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -998,6 +998,15 @@ bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { #endif } +void ArmMir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { + LOG(FATAL) << "Unexpected use GenNotLong()"; +} + +void ArmMir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div) { + LOG(FATAL) << "Unexpected use GenDivRemLong()"; +} + void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index c3b23fdf60..6a6b0f6a53 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -298,6 +298,7 @@ enum ArmOpcode { kA64Mov2rr, // mov [00101010000] rm[20-16] [000000] [11111] rd[4-0]. kA64Mvn2rr, // mov [00101010001] rm[20-16] [000000] [11111] rd[4-0]. kA64Mul3rrr, // mul [00011011000] rm[20-16] [011111] rn[9-5] rd[4-0]. + kA64Msub4rrrr, // msub[s0011011000] rm[20-16] [1] ra[14-10] rn[9-5] rd[4-0]. kA64Neg3rro, // neg alias of "sub arg0, rzr, arg1, arg2". kA64Orr3Rrl, // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. kA64Orr4rrro, // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0]. diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index 656f8fd6a4..4a0c055f4d 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -422,6 +422,10 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, "mul", "!0r, !1r, !2r", kFixupNone), + ENCODING_MAP(WIDE(kA64Msub4rrrr), SF_VARIANTS(0x1b008000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 14, 10, + kFmtRegR, 20, 16, IS_QUAD_OP | REG_DEF0_USE123, + "msub", "!0r, !1r, !3r, !2r", kFixupNone), ENCODING_MAP(WIDE(kA64Neg3rro), SF_VARIANTS(0x4b0003e0), kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index f7a01993d3..2e3ef86b9c 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -94,8 +94,7 @@ void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, tab_rec->anchor = switch_label; // Add displacement to base branch address and go! - OpRegRegRegShift(kOpAdd, r_base.GetReg(), r_base.GetReg(), r_disp.GetReg(), - ENCODE_NO_SHIFT, true); + OpRegRegRegShift(kOpAdd, r_base, r_base, r_disp, ENCODE_NO_SHIFT); NewLIR1(kA64Br1x, r_base.GetReg()); // Loop exit label. @@ -148,8 +147,7 @@ void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, tab_rec->anchor = switch_label; // Add displacement to base branch address and go! - OpRegRegRegShift(kOpAdd, branch_reg.GetReg(), branch_reg.GetReg(), disp_reg.GetReg(), - ENCODE_NO_SHIFT, true); + OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, disp_reg, ENCODE_NO_SHIFT); NewLIR1(kA64Br1x, branch_reg.GetReg()); // branch_over target here @@ -334,7 +332,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) if (!skip_overflow_check) { LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x12); - OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true); + OpRegImm64(kOpSub, rs_rA64_SP, frame_size_); if (Runtime::Current()->ExplicitStackOverflowChecks()) { /* Load stack limit */ // TODO(Arm64): fix the line below: @@ -348,7 +346,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) MarkPossibleStackOverflowException(); } } else if (frame_size_ > 0) { - OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true); + OpRegImm64(kOpSub, rs_rA64_SP, frame_size_); } /* Need to spill any FP regs? */ @@ -391,7 +389,7 @@ void Arm64Mir2Lir::GenExitSequence() { UnSpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_); } - OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_, /*is_wide*/true); + OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_); NewLIR0(kA64Ret); } diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 350e4830ae..fddbfd79ac 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -93,6 +93,8 @@ class Arm64Mir2Lir : public Mir2Lir { RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE; // Required for target - Dalvik-level generators. + void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation lr_shift); void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, @@ -120,6 +122,8 @@ class Arm64Mir2Lir : public Mir2Lir { bool GenInlinedSqrt(CallInfo* info); bool GenInlinedPeek(CallInfo* info, OpSize size); bool GenInlinedPoke(CallInfo* info, OpSize size); + void GenIntToLong(RegLocation rl_dest, RegLocation rl_src); + void GenNotLong(RegLocation rl_dest, RegLocation rl_src); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -127,6 +131,8 @@ class Arm64Mir2Lir : public Mir2Lir { RegLocation rl_src2); void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div); RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div); RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div); void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -170,7 +176,7 @@ class Arm64Mir2Lir : public Mir2Lir { LIR* OpReg(OpKind op, RegStorage r_dest_src); void OpRegCopy(RegStorage r_dest, RegStorage r_src); LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src); - LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide); + LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value); LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value); LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset); LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2); @@ -191,8 +197,8 @@ class Arm64Mir2Lir : public Mir2Lir { LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size); LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size); - LIR* OpRegRegRegShift(OpKind op, int r_dest, int r_src1, int r_src2, int shift, - bool is_wide = false); + LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2, + int shift); LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift); static const ArmEncodingMap EncodingMap[kA64Last]; int EncodeShift(int code, int amount); diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index b0f5904db4..38f110ead2 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -53,10 +53,36 @@ void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); - NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq); - NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(), + NewLIR4(WIDE(kA64Csinc4rrrc), rl_result.reg.GetReg(), rxzr, rxzr, kArmCondEq); + NewLIR4(WIDE(kA64Csneg4rrrc), rl_result.reg.GetReg(), rl_result.reg.GetReg(), rl_result.reg.GetReg(), kArmCondLe); - StoreValue(rl_dest, rl_result); + StoreValueWide(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift) { + OpKind op = kOpBkpt; + switch (opcode) { + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + op = kOpLsl; + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + op = kOpAsr; + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + op = kOpLsr; + break; + default: + LOG(FATAL) << "Unexpected case: " << opcode; + } + rl_shift = LoadValueWide(rl_shift, kCoreReg); + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegRegReg(op, rl_result.reg, rl_src1.reg, rl_shift.reg); + StoreValueWide(rl_dest, rl_result); } void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, @@ -69,7 +95,7 @@ void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, LIR* branch = NewLIR2(WIDE(opcode), rl_src1.reg.GetLowReg(), 0); branch->target = taken; } else { - OpRegImm64(kOpCmp, rl_src1.reg, val, /*is_wide*/true); + OpRegImm64(kOpCmp, rl_src1.reg, val); OpCondBranch(ccode, taken); } } @@ -219,7 +245,8 @@ LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_ ArmConditionCode arm_cond = ArmConditionEncoding(cond); if (check_value == 0 && (arm_cond == kArmCondEq || arm_cond == kArmCondNe)) { ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt; - branch = NewLIR2(opcode, reg.GetReg(), 0); + ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0); + branch = NewLIR2(opcode | wide, reg.GetReg(), 0); } else { OpRegImm(kOpCmp, reg, check_value); branch = NewLIR2(kA64B2ct, arm_cond, 0); @@ -354,19 +381,16 @@ bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_d NewLIR4(kA64Smaddl4xwwx, r_lo.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg(), rxzr); switch (pattern) { case Divide3: - OpRegRegRegShift(kOpSub, rl_result.reg.GetReg(), r_hi.GetReg(), - rl_src.reg.GetReg(), EncodeShift(kA64Asr, 31)); + OpRegRegRegShift(kOpSub, rl_result.reg, r_hi, rl_src.reg, EncodeShift(kA64Asr, 31)); break; case Divide5: OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); - OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(), - EncodeShift(kA64Asr, magic_table[lit].shift)); + OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift)); break; case Divide7: OpRegReg(kOpAdd, r_hi, rl_src.reg); OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); - OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(), - EncodeShift(kA64Asr, magic_table[lit].shift)); + OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift)); break; default: LOG(FATAL) << "Unexpected pattern: " << pattern; @@ -405,25 +429,30 @@ RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int return rl_result; } -RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2, +RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2, bool is_div) { + CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit()); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (is_div) { - // Simple case, use sdiv instruction. - OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2); + OpRegRegReg(kOpDiv, rl_result.reg, r_src1, r_src2); } else { - // Remainder case, use the following code: - // temp = reg1 / reg2 - integer division - // temp = temp * reg2 - // dest = reg1 - temp - - RegStorage temp = AllocTemp(); - OpRegRegReg(kOpDiv, temp, reg1, reg2); - OpRegReg(kOpMul, temp, reg2); - OpRegRegReg(kOpSub, rl_result.reg, reg1, temp); + // temp = r_src1 / r_src2 + // dest = r_src1 - temp * r_src2 + RegStorage temp; + ArmOpcode wide; + if (rl_result.reg.Is64Bit()) { + temp = AllocTempWide(); + wide = WIDE(0); + } else { + temp = AllocTemp(); + wide = UNWIDE(0); + } + OpRegRegReg(kOpDiv, temp, r_src1, r_src2); + NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(), + r_src1.GetReg(), r_src2.GetReg()); FreeTemp(temp); } - return rl_result; } @@ -684,17 +713,14 @@ LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) { void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, int second_bit) { - OpRegRegRegShift(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(), - EncodeShift(kA64Lsl, second_bit - first_bit)); + OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsl, second_bit - first_bit)); if (first_bit != 0) { OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); } } void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) { - DCHECK(reg.IsPair()); // TODO: support k64BitSolo. - OpRegImm64(kOpCmp, reg, 0, /*is_wide*/true); - GenDivZeroCheck(kCondEq); + LOG(FATAL) << "Unexpected use of GenDivZero for Arm64"; } // Test suspend flag, return target of taken suspend branch @@ -756,33 +782,51 @@ bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { #endif } -void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { - rl_src = LoadValueWide(rl_src, kCoreReg); - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - RegStorage z_reg = AllocTemp(); - LoadConstantNoClobber(z_reg, 0); - // Check for destructive overlap - if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) { - RegStorage t_reg = AllocTemp(); - OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow()); - OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg); - FreeTemp(t_reg); - } else { - OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow()); - OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh()); - } - FreeTemp(z_reg); +void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { + RegLocation rl_result; + + rl_src = LoadValue(rl_src, kCoreReg); + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 31); + StoreValueWide(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2, bool is_div) { + RegLocation rl_result; + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + GenDivZeroCheck(rl_src2.reg); + rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div); StoreValueWide(rl_dest, rl_result); } void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { RegLocation rl_result; + rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_src2 = LoadValueWide(rl_src2, kCoreReg); rl_result = EvalLocWide(rl_dest, kCoreReg, true); - OpRegRegRegShift(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), - ENCODE_NO_SHIFT, /*is_wide*/ true); + OpRegRegRegShift(op, rl_result.reg, rl_src1.reg, rl_src2.reg, ENCODE_NO_SHIFT); + StoreValueWide(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { + RegLocation rl_result; + + rl_src = LoadValueWide(rl_src, kCoreReg); + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegRegShift(kOpNeg, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT); + StoreValueWide(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { + RegLocation rl_result; + + rl_src = LoadValueWide(rl_src, kCoreReg); + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegRegShift(kOpMvn, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT); StoreValueWide(rl_dest, rl_result); } @@ -865,8 +909,7 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, } else { // No special indexed operation, lea + load w/ displacement reg_ptr = AllocTemp(); - OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(), - EncodeShift(kA64Lsl, scale)); + OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale)); FreeTemp(rl_index.reg); } rl_result = EvalLoc(rl_dest, reg_class, true); @@ -971,8 +1014,7 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, rl_src = LoadValue(rl_src, reg_class); } if (!constant_index) { - OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(), - EncodeShift(kA64Lsl, scale)); + OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale)); } if (needs_range_check) { if (constant_index) { @@ -1004,167 +1046,84 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, } } - void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) { - // TODO(Arm64): check this. - UNIMPLEMENTED(WARNING); - - rl_src = LoadValueWide(rl_src, kCoreReg); + OpKind op = kOpBkpt; // Per spec, we only care about low 6 bits of shift amount. int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; + rl_src = LoadValueWide(rl_src, kCoreReg); if (shift_amount == 0) { StoreValueWide(rl_dest, rl_src); return; } - if (BadOverlap(rl_src, rl_dest)) { - GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); - return; - } - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + + RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); switch (opcode) { case Instruction::SHL_LONG: case Instruction::SHL_LONG_2ADDR: - if (shift_amount == 1) { - OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow()); - OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh()); - } else if (shift_amount == 32) { - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg); - LoadConstant(rl_result.reg.GetLow(), 0); - } else if (shift_amount > 31) { - OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32); - LoadConstant(rl_result.reg.GetLow(), 0); - } else { - OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); - OpRegRegRegShift(kOpOr, rl_result.reg.GetHighReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetLowReg(), - EncodeShift(kA64Lsr, 32 - shift_amount)); - OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount); - } + op = kOpLsl; break; case Instruction::SHR_LONG: case Instruction::SHR_LONG_2ADDR: - if (shift_amount == 32) { - OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); - OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31); - } else if (shift_amount > 31) { - OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32); - OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31); - } else { - RegStorage t_reg = AllocTemp(); - OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount); - OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(), - EncodeShift(kA64Lsl, 32 - shift_amount)); - FreeTemp(t_reg); - OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); - } + op = kOpAsr; break; case Instruction::USHR_LONG: case Instruction::USHR_LONG_2ADDR: - if (shift_amount == 32) { - OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); - LoadConstant(rl_result.reg.GetHigh(), 0); - } else if (shift_amount > 31) { - OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32); - LoadConstant(rl_result.reg.GetHigh(), 0); - } else { - RegStorage t_reg = AllocTemp(); - OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount); - OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(), - EncodeShift(kA64Lsl, 32 - shift_amount)); - FreeTemp(t_reg); - OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); - } + op = kOpLsr; break; default: LOG(FATAL) << "Unexpected case"; } + OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount); StoreValueWide(rl_dest, rl_result); } void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { - // TODO(Arm64): implement this. - UNIMPLEMENTED(WARNING); - - if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) { + if ((opcode == Instruction::SUB_LONG) || (opcode == Instruction::SUB_LONG_2ADDR)) { if (!rl_src2.is_const) { - // Don't bother with special handling for subtract from immediate. - GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); - return; + return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); } } else { - // Normalize + // Associativity. if (!rl_src2.is_const) { DCHECK(rl_src1.is_const); std::swap(rl_src1, rl_src2); } } - if (BadOverlap(rl_src1, rl_dest)) { - GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); - return; - } DCHECK(rl_src2.is_const); - // TODO(Arm64): implement this. - // int64_t val = mir_graph_->ConstantValueWide(rl_src2); - int32_t mod_imm_lo = -1; // ModifiedImmediate(val_lo); - int32_t mod_imm_hi = -1; // ModifiedImmediate(val_hi); - // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit + OpKind op = kOpBkpt; + int64_t val = mir_graph_->ConstantValueWide(rl_src2); + switch (opcode) { case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: + op = kOpAdd; + break; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: - if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) { - GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); - return; - } - break; - default: + op = kOpSub; break; - } - rl_src1 = LoadValueWide(rl_src1, kCoreReg); - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - // NOTE: once we've done the EvalLoc on dest, we can no longer bail. - switch (opcode) { -#if 0 - case Instruction::ADD_LONG: - case Instruction::ADD_LONG_2ADDR: - NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo); - NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi); + case Instruction::AND_LONG: + case Instruction::AND_LONG_2ADDR: + op = kOpAnd; break; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: - if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) { - OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); - } - if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) { - OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); - } + op = kOpOr; break; case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: - OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); - OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); + op = kOpXor; break; - case Instruction::AND_LONG: - case Instruction::AND_LONG_2ADDR: - if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) { - OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); - } - if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) { - OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); - } - break; - case Instruction::SUB_LONG_2ADDR: - case Instruction::SUB_LONG: - NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo); - NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi); - break; -#endif default: - LOG(FATAL) << "Unexpected opcode " << opcode; + LOG(FATAL) << "Unexpected opcode"; } + + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegRegImm(op, rl_result.reg, rl_src1.reg, val); StoreValueWide(rl_dest, rl_result); } diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 2b1c5e8386..808060d67c 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -606,7 +606,7 @@ void Arm64Mir2Lir::CompilerInitializeRegAlloc() { GrowableArray<RegisterInfo*>::Iterator fp_it(®_pool_->sp_regs_); for (RegisterInfo* info = fp_it.Next(); info != nullptr; info = fp_it.Next()) { int fp_reg_num = info->GetReg().GetRegNum(); - RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | fp_reg_num); + RegStorage dp_reg = RegStorage::FloatSolo64(fp_reg_num); RegisterInfo* dp_reg_info = GetRegInfo(dp_reg); // Double precision register's master storage should refer to itself. DCHECK_EQ(dp_reg_info, dp_reg_info->Master()); @@ -616,6 +616,20 @@ void Arm64Mir2Lir::CompilerInitializeRegAlloc() { DCHECK_EQ(info->StorageMask(), 0x1U); } + // Alias 32bit W registers to corresponding 64bit X registers. + GrowableArray<RegisterInfo*>::Iterator w_it(®_pool_->core_regs_); + for (RegisterInfo* info = w_it.Next(); info != nullptr; info = w_it.Next()) { + int x_reg_num = info->GetReg().GetRegNum(); + RegStorage x_reg = RegStorage::Solo64(x_reg_num); + RegisterInfo* x_reg_info = GetRegInfo(x_reg); + // 64bit X register's master storage should refer to itself. + DCHECK_EQ(x_reg_info, x_reg_info->Master()); + // Redirect 32bit W master storage to 64bit X. + info->SetMaster(x_reg_info); + // 32bit W should show a single 32-bit mask bit, at first referring to the low half. + DCHECK_EQ(info->StorageMask(), 0x1U); + } + // TODO: re-enable this when we can safely save r4 over the suspension code path. bool no_suspend = NO_SUSPEND; // || !Runtime::Current()->ExplicitSuspendChecks(); if (no_suspend) { diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index 39e9fad816..eca0d2fa82 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -408,7 +408,7 @@ LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r DCHECK_EQ(shift, ENCODE_NO_SHIFT); return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15); default: - return OpRegRegRegShift(op, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift); + return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift); } DCHECK(!IsPseudoLirOp(opcode)); @@ -445,8 +445,8 @@ LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, return NULL; } -LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1, - int r_src2, int shift, bool is_wide) { +LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, + RegStorage r_src2, int shift) { ArmOpcode opcode = kA64Brk1d; switch (op) { @@ -500,21 +500,24 @@ LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1, // The instructions above belong to two kinds: // - 4-operands instructions, where the last operand is a shift/extend immediate, // - 3-operands instructions with no shift/extend. - ArmOpcode widened_opcode = (is_wide) ? WIDE(opcode) : opcode; + ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode; + CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit()); + CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit()); if (EncodingMap[opcode].flags & IS_QUAD_OP) { DCHECK_EQ(shift, ENCODE_NO_SHIFT); - return NewLIR4(widened_opcode, r_dest, r_src1, r_src2, shift); + return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift); } else { DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); DCHECK_EQ(shift, ENCODE_NO_SHIFT); - return NewLIR3(widened_opcode, r_dest, r_src1, r_src2); + return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg()); } } LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) { - return OpRegRegRegShift(op, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT); + return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT); } +// Should be taking an int64_t value ? LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) { LIR* res; bool neg = (value < 0); @@ -523,6 +526,7 @@ LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, ArmOpcode alt_opcode = kA64Brk1d; int32_t log_imm = -1; bool is_wide = r_dest.Is64Bit(); + CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit()); ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); switch (op) { @@ -610,11 +614,11 @@ LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, } LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { - return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value), /*is_wide*/false); + return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value)); } -LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide) { - ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); +LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) { + ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); ArmOpcode opcode = kA64Brk1d; ArmOpcode neg_opcode = kA64Brk1d; bool shift; diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 3ec31ba7d9..526c981ae9 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -35,15 +35,9 @@ namespace art { namespace { // anonymous namespace MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke, MIR* move_return) { - ArenaAllocator* arena = mir_graph->GetArena(); - MIR* insn = static_cast<MIR*>(arena->Alloc(sizeof(MIR), kArenaAllocMIR)); + MIR* insn = mir_graph->NewMIR(); insn->offset = invoke->offset; - insn->width = invoke->width; insn->optimization_flags = MIR_CALLEE; - if (move_return != nullptr) { - DCHECK_EQ(move_return->offset, invoke->offset + invoke->width); - insn->width += move_return->width; - } return insn; } @@ -660,7 +654,6 @@ bool DexFileMethodInliner::GenInlineIGet(MIRGraph* mir_graph, BasicBlock* bb, MI } MIR* insn = AllocReplacementMIR(mir_graph, invoke, move_result); - insn->width += insn->offset - invoke->offset; insn->offset = invoke->offset; insn->dalvikInsn.opcode = opcode; insn->dalvikInsn.vA = move_result->dalvikInsn.vA; @@ -737,9 +730,7 @@ bool DexFileMethodInliner::GenInlineIPut(MIRGraph* mir_graph, BasicBlock* bb, MI if (move_result != nullptr) { MIR* move = AllocReplacementMIR(mir_graph, invoke, move_result); - insn->width = invoke->width; move->offset = move_result->offset; - move->width = move_result->width; if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT) { move->dalvikInsn.opcode = Instruction::MOVE_FROM16; } else if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) { diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index de55a05568..7e3c8ce7e7 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -1595,7 +1595,7 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegReg(op, rl_result.reg, rl_src1.reg); } else { - if (shift_op) { + if ((shift_op) && (cu_->instruction_set != kArm64)) { rl_src2 = LoadValue(rl_src2, kCoreReg); RegStorage t_reg = AllocTemp(); OpRegRegImm(kOpAnd, t_reg, rl_src2.reg, 31); @@ -1613,7 +1613,7 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, StoreValue(rl_dest, rl_result); } else { bool done = false; // Set to true if we happen to find a way to use a real instruction. - if (cu_->instruction_set == kMips) { + if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) { rl_src1 = LoadValue(rl_src1, kCoreReg); rl_src2 = LoadValue(rl_src2, kCoreReg); if (check_zero) { @@ -1889,7 +1889,7 @@ void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, Re } bool done = false; - if (cu_->instruction_set == kMips) { + if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) { rl_src = LoadValue(rl_src, kCoreReg); rl_result = GenDivRemLit(rl_dest, rl_src.reg, lit, is_div); done = true; @@ -1952,6 +1952,10 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc switch (opcode) { case Instruction::NOT_LONG: + if (cu->instruction_set == kArm64) { + mir_to_lir->GenNotLong(rl_dest, rl_src2); + return; + } rl_src2 = mir_to_lir->LoadValueWide(rl_src2, kCoreReg); rl_result = mir_to_lir->EvalLoc(rl_dest, kCoreReg, true); // Check for destructive overlap @@ -1998,6 +2002,10 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc break; case Instruction::DIV_LONG: case Instruction::DIV_LONG_2ADDR: + if (cu->instruction_set == kArm64) { + mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true); + return; + } call_out = true; check_zero = true; ret_reg = mir_to_lir->TargetReg(kRet0).GetReg(); @@ -2005,6 +2013,10 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc break; case Instruction::REM_LONG: case Instruction::REM_LONG_2ADDR: + if (cu->instruction_set == kArm64) { + mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false); + return; + } call_out = true; check_zero = true; func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pLmod); @@ -2014,7 +2026,8 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc break; case Instruction::AND_LONG_2ADDR: case Instruction::AND_LONG: - if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64) { + if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64 || + cu->instruction_set == kArm64) { return mir_to_lir->GenAndLong(opcode, rl_dest, rl_src1, rl_src2); } first_op = kOpAnd; @@ -2022,7 +2035,8 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc break; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: - if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64) { + if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64 || + cu->instruction_set == kArm64) { mir_to_lir->GenOrLong(opcode, rl_dest, rl_src1, rl_src2); return; } @@ -2031,7 +2045,8 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc break; case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: - if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64) { + if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64 || + cu->instruction_set == kArm64) { mir_to_lir->GenXorLong(opcode, rl_dest, rl_src1, rl_src2); return; } diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index 2b57b35443..e46217337b 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -118,6 +118,7 @@ class MipsMir2Lir FINAL : public Mir2Lir { bool GenInlinedSqrt(CallInfo* info); bool GenInlinedPeek(CallInfo* info, OpSize size); bool GenInlinedPoke(CallInfo* info, OpSize size); + void GenNotLong(RegLocation rl_dest, RegLocation rl_src); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -125,6 +126,8 @@ class MipsMir2Lir FINAL : public Mir2Lir { RegLocation rl_src2); void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div); RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div); RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div); void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc index 55e93d7d78..beaf6bb8ea 100644 --- a/compiler/dex/quick/mips/int_mips.cc +++ b/compiler/dex/quick/mips/int_mips.cc @@ -431,6 +431,15 @@ void MipsMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, StoreValueWide(rl_dest, rl_result); } +void MipsMir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { + LOG(FATAL) << "Unexpected use GenNotLong()"; +} + +void MipsMir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div) { + LOG(FATAL) << "Unexpected use GenDivRemLong()"; +} + void MipsMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 3584c33291..4cebb7cedb 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -775,7 +775,7 @@ class Mir2Lir : public Backend { RegLocation rl_src2, LIR* taken, LIR* fall_through); void GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken, LIR* fall_through); - void GenIntToLong(RegLocation rl_dest, RegLocation rl_src); + virtual void GenIntToLong(RegLocation rl_dest, RegLocation rl_src); void GenIntNarrowing(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src); void GenNewArray(uint32_t type_idx, RegLocation rl_dest, @@ -800,7 +800,7 @@ class Mir2Lir : public Backend { void GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_src); void GenLong3Addr(OpKind first_op, OpKind second_op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, + virtual void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); void GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, int lit); @@ -1170,6 +1170,7 @@ class Mir2Lir : public Backend { virtual bool GenInlinedSqrt(CallInfo* info) = 0; virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0; virtual bool GenInlinedPoke(CallInfo* info, OpSize size) = 0; + virtual void GenNotLong(RegLocation rl_dest, RegLocation rl_src) = 0; virtual void GenNegLong(RegLocation rl_dest, RegLocation rl_src) = 0; virtual void GenOrLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) = 0; @@ -1177,6 +1178,8 @@ class Mir2Lir : public Backend { RegLocation rl_src2) = 0; virtual void GenXorLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) = 0; + virtual void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div) = 0; virtual RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div) = 0; virtual RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 2c51c1f2fd..8c0f2bb7e2 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -447,8 +447,11 @@ RegStorage Mir2Lir::AllocLiveReg(int s_reg, int reg_class, bool wide) { reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg); } if (!reg.Valid() && (reg_class != kFPReg)) { - // TODO: add 64-bit core pool similar to above. - reg = FindLiveReg(reg_pool_->core_regs_, s_reg); + if (Is64BitInstructionSet(cu_->instruction_set)) { + reg = FindLiveReg(wide ? reg_pool_->core64_regs_ : reg_pool_->core_regs_, s_reg); + } else { + reg = FindLiveReg(reg_pool_->core_regs_, s_reg); + } } if (reg.Valid()) { if (wide && !reg.IsFloat() && !Is64BitInstructionSet(cu_->instruction_set)) { diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 3070edd202..72cdbbd840 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -118,6 +118,7 @@ class X86Mir2Lir : public Mir2Lir { bool GenInlinedSqrt(CallInfo* info); bool GenInlinedPeek(CallInfo* info, OpSize size); bool GenInlinedPoke(CallInfo* info, OpSize size); + void GenNotLong(RegLocation rl_dest, RegLocation rl_src); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -125,6 +126,8 @@ class X86Mir2Lir : public Mir2Lir { RegLocation rl_src2); void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div); // TODO: collapse reg_lo, reg_hi RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div); RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index a6ccc9959e..48bff6e6af 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -1372,6 +1372,15 @@ void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); } +void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { + LOG(FATAL) << "Unexpected use GenNotLong()"; +} + +void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div) { + LOG(FATAL) << "Unexpected use GenDivRemLong()"; +} + void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_result = ForceTempWide(rl_src); @@ -2189,6 +2198,10 @@ void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, } } rl_rhs = LoadValue(rl_rhs, kCoreReg); + // It might happen rl_rhs and rl_dest are the same VR + // in this case rl_dest is in reg after LoadValue while + // rl_result is not updated yet, so do this + rl_result = UpdateLocTyped(rl_dest, kCoreReg); if (rl_result.location != kLocPhysReg) { // Okay, we can do this into memory. OpMemReg(op, rl_result, rl_rhs.reg.GetReg()); diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h index 3387c50e28..df21343884 100644 --- a/compiler/dex/reg_storage.h +++ b/compiler/dex/reg_storage.h @@ -312,7 +312,7 @@ class RegStorage { case k256BitSolo: return 32; case k512BitSolo: return 64; case k1024BitSolo: return 128; - default: LOG(FATAL) << "Unexpected shap"; + default: LOG(FATAL) << "Unexpected shape"; } return 0; } diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc index 5aa093a494..865311b084 100644 --- a/compiler/dex/ssa_transformation.cc +++ b/compiler/dex/ssa_transformation.cc @@ -557,8 +557,7 @@ void MIRGraph::InsertPhiNodes() { if (!phi_bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) { continue; } - MIR *phi = - static_cast<MIR*>(arena_->Alloc(sizeof(MIR), kArenaAllocDFInfo)); + MIR *phi = NewMIR(); phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi); phi->dalvikInsn.vA = dalvik_reg; phi->offset = phi_bb->start_offset; diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index aafd801eab..e18902ffb2 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -20,6 +20,7 @@ #include "base/bit_field.h" #include "globals.h" #include "instruction_set.h" +#include "locations.h" #include "memory_region.h" #include "nodes.h" #include "utils/assembler.h" @@ -46,267 +47,6 @@ struct PcInfo { uintptr_t native_pc; }; -/** - * A Location is an abstraction over the potential location - * of an instruction. It could be in register or stack. - */ -class Location : public ValueObject { - public: - enum Kind { - kInvalid = 0, - kStackSlot = 1, // Word size slot. - kDoubleStackSlot = 2, // 64bit stack slot. - kRegister = 3, - // On 32bits architectures, quick can pass a long where the - // low bits are in the last parameter register, and the high - // bits are in a stack slot. The kQuickParameter kind is for - // handling this special case. - kQuickParameter = 4, - - // Unallocated location represents a location that is not fixed and can be - // allocated by a register allocator. Each unallocated location has - // a policy that specifies what kind of location is suitable. Payload - // contains register allocation policy. - kUnallocated = 5, - }; - - Location() : value_(kInvalid) { - DCHECK(!IsValid()); - } - - Location(const Location& other) : ValueObject(), value_(other.value_) {} - - Location& operator=(const Location& other) { - value_ = other.value_; - return *this; - } - - bool IsValid() const { - return value_ != kInvalid; - } - - // Register locations. - static Location RegisterLocation(ManagedRegister reg) { - return Location(kRegister, reg.RegId()); - } - - bool IsRegister() const { - return GetKind() == kRegister; - } - - ManagedRegister reg() const { - DCHECK(IsRegister()); - return static_cast<ManagedRegister>(GetPayload()); - } - - static uword EncodeStackIndex(intptr_t stack_index) { - DCHECK(-kStackIndexBias <= stack_index); - DCHECK(stack_index < kStackIndexBias); - return static_cast<uword>(kStackIndexBias + stack_index); - } - - static Location StackSlot(intptr_t stack_index) { - uword payload = EncodeStackIndex(stack_index); - Location loc(kStackSlot, payload); - // Ensure that sign is preserved. - DCHECK_EQ(loc.GetStackIndex(), stack_index); - return loc; - } - - bool IsStackSlot() const { - return GetKind() == kStackSlot; - } - - static Location DoubleStackSlot(intptr_t stack_index) { - uword payload = EncodeStackIndex(stack_index); - Location loc(kDoubleStackSlot, payload); - // Ensure that sign is preserved. - DCHECK_EQ(loc.GetStackIndex(), stack_index); - return loc; - } - - bool IsDoubleStackSlot() const { - return GetKind() == kDoubleStackSlot; - } - - intptr_t GetStackIndex() const { - DCHECK(IsStackSlot() || IsDoubleStackSlot()); - // Decode stack index manually to preserve sign. - return GetPayload() - kStackIndexBias; - } - - intptr_t GetHighStackIndex(uintptr_t word_size) const { - DCHECK(IsDoubleStackSlot()); - // Decode stack index manually to preserve sign. - return GetPayload() - kStackIndexBias + word_size; - } - - static Location QuickParameter(uint32_t parameter_index) { - return Location(kQuickParameter, parameter_index); - } - - uint32_t GetQuickParameterIndex() const { - DCHECK(IsQuickParameter()); - return GetPayload(); - } - - bool IsQuickParameter() const { - return GetKind() == kQuickParameter; - } - - arm::ArmManagedRegister AsArm() const; - x86::X86ManagedRegister AsX86() const; - - Kind GetKind() const { - return KindField::Decode(value_); - } - - bool Equals(Location other) const { - return value_ == other.value_; - } - - const char* DebugString() const { - switch (GetKind()) { - case kInvalid: return "?"; - case kRegister: return "R"; - case kStackSlot: return "S"; - case kDoubleStackSlot: return "DS"; - case kQuickParameter: return "Q"; - case kUnallocated: return "U"; - } - return "?"; - } - - // Unallocated locations. - enum Policy { - kAny, - kRequiresRegister, - kSameAsFirstInput, - }; - - bool IsUnallocated() const { - return GetKind() == kUnallocated; - } - - static Location UnallocatedLocation(Policy policy) { - return Location(kUnallocated, PolicyField::Encode(policy)); - } - - // Any free register is suitable to replace this unallocated location. - static Location Any() { - return UnallocatedLocation(kAny); - } - - static Location RequiresRegister() { - return UnallocatedLocation(kRequiresRegister); - } - - // The location of the first input to the instruction will be - // used to replace this unallocated location. - static Location SameAsFirstInput() { - return UnallocatedLocation(kSameAsFirstInput); - } - - Policy GetPolicy() const { - DCHECK(IsUnallocated()); - return PolicyField::Decode(GetPayload()); - } - - uword GetEncoding() const { - return GetPayload(); - } - - private: - // Number of bits required to encode Kind value. - static constexpr uint32_t kBitsForKind = 4; - static constexpr uint32_t kBitsForPayload = kWordSize * kBitsPerByte - kBitsForKind; - - explicit Location(uword value) : value_(value) {} - - Location(Kind kind, uword payload) - : value_(KindField::Encode(kind) | PayloadField::Encode(payload)) {} - - uword GetPayload() const { - return PayloadField::Decode(value_); - } - - typedef BitField<Kind, 0, kBitsForKind> KindField; - typedef BitField<uword, kBitsForKind, kBitsForPayload> PayloadField; - - // Layout for kUnallocated locations payload. - typedef BitField<Policy, 0, 3> PolicyField; - - // Layout for stack slots. - static const intptr_t kStackIndexBias = - static_cast<intptr_t>(1) << (kBitsForPayload - 1); - - // Location either contains kind and payload fields or a tagged handle for - // a constant locations. Values of enumeration Kind are selected in such a - // way that none of them can be interpreted as a kConstant tag. - uword value_; -}; - -/** - * The code generator computes LocationSummary for each instruction so that - * the instruction itself knows what code to generate: where to find the inputs - * and where to place the result. - * - * The intent is to have the code for generating the instruction independent of - * register allocation. A register allocator just has to provide a LocationSummary. - */ -class LocationSummary : public ArenaObject { - public: - explicit LocationSummary(HInstruction* instruction) - : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), - temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0) { - inputs_.SetSize(instruction->InputCount()); - for (size_t i = 0; i < instruction->InputCount(); i++) { - inputs_.Put(i, Location()); - } - } - - void SetInAt(uint32_t at, Location location) { - inputs_.Put(at, location); - } - - Location InAt(uint32_t at) const { - return inputs_.Get(at); - } - - size_t GetInputCount() const { - return inputs_.Size(); - } - - void SetOut(Location location) { - output_ = Location(location); - } - - void AddTemp(Location location) { - temps_.Add(location); - } - - Location GetTemp(uint32_t at) const { - return temps_.Get(at); - } - - void SetTempAt(uint32_t at, Location location) { - temps_.Put(at, location); - } - - size_t GetTempCount() const { - return temps_.Size(); - } - - Location Out() const { return output_; } - - private: - GrowableArray<Location> inputs_; - GrowableArray<Location> temps_; - Location output_; - - DISALLOW_COPY_AND_ASSIGN(LocationSummary); -}; - class CodeGenerator : public ArenaObject { public: // Compiles the graph to executable instructions. Returns whether the compilation diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index be512323c0..f1b16a1d4a 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -793,5 +793,13 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unimplemented"; } +void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction) { + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorARM::VisitParallelMove(HParallelMove* instruction) { + LOG(FATAL) << "Unimplemented"; +} + } // namespace arm } // namespace art diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index e4f95c795f..b8b25f9886 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -813,5 +813,13 @@ void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unimplemented"; } +void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction) { + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) { + LOG(FATAL) << "Unimplemented"; +} + } // namespace x86 } // namespace art diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc new file mode 100644 index 0000000000..98766d2701 --- /dev/null +++ b/compiler/optimizing/locations.cc @@ -0,0 +1,32 @@ + /* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "locations.h" + +#include "nodes.h" + +namespace art { + +LocationSummary::LocationSummary(HInstruction* instruction) + : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), + temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0) { + inputs_.SetSize(instruction->InputCount()); + for (size_t i = 0; i < instruction->InputCount(); i++) { + inputs_.Put(i, Location()); + } +} + +} // namespace art diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h new file mode 100644 index 0000000000..3c60d3cbe8 --- /dev/null +++ b/compiler/optimizing/locations.h @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LOCATIONS_H_ +#define ART_COMPILER_OPTIMIZING_LOCATIONS_H_ + +#include "base/bit_field.h" +#include "utils/allocation.h" +#include "utils/growable_array.h" +#include "utils/managed_register.h" + +namespace art { + +class HInstruction; + +/** + * A Location is an abstraction over the potential location + * of an instruction. It could be in register or stack. + */ +class Location : public ValueObject { + public: + enum Kind { + kInvalid = 0, + kStackSlot = 1, // Word size slot. + kDoubleStackSlot = 2, // 64bit stack slot. + kRegister = 3, + // On 32bits architectures, quick can pass a long where the + // low bits are in the last parameter register, and the high + // bits are in a stack slot. The kQuickParameter kind is for + // handling this special case. + kQuickParameter = 4, + + // Unallocated location represents a location that is not fixed and can be + // allocated by a register allocator. Each unallocated location has + // a policy that specifies what kind of location is suitable. Payload + // contains register allocation policy. + kUnallocated = 5, + }; + + Location() : value_(kInvalid) { + DCHECK(!IsValid()); + } + + Location(const Location& other) : ValueObject(), value_(other.value_) {} + + Location& operator=(const Location& other) { + value_ = other.value_; + return *this; + } + + bool IsValid() const { + return value_ != kInvalid; + } + + bool IsInvalid() const { + return !IsValid(); + } + + bool IsConstant() const { + // TODO: support constants. + return false; + } + + // Empty location. Used if there the location should be ignored. + static Location NoLocation() { + return Location(); + } + + // Register locations. + static Location RegisterLocation(ManagedRegister reg) { + return Location(kRegister, reg.RegId()); + } + + bool IsRegister() const { + return GetKind() == kRegister; + } + + ManagedRegister reg() const { + DCHECK(IsRegister()); + return static_cast<ManagedRegister>(GetPayload()); + } + + static uword EncodeStackIndex(intptr_t stack_index) { + DCHECK(-kStackIndexBias <= stack_index); + DCHECK(stack_index < kStackIndexBias); + return static_cast<uword>(kStackIndexBias + stack_index); + } + + static Location StackSlot(intptr_t stack_index) { + uword payload = EncodeStackIndex(stack_index); + Location loc(kStackSlot, payload); + // Ensure that sign is preserved. + DCHECK_EQ(loc.GetStackIndex(), stack_index); + return loc; + } + + bool IsStackSlot() const { + return GetKind() == kStackSlot; + } + + static Location DoubleStackSlot(intptr_t stack_index) { + uword payload = EncodeStackIndex(stack_index); + Location loc(kDoubleStackSlot, payload); + // Ensure that sign is preserved. + DCHECK_EQ(loc.GetStackIndex(), stack_index); + return loc; + } + + bool IsDoubleStackSlot() const { + return GetKind() == kDoubleStackSlot; + } + + intptr_t GetStackIndex() const { + DCHECK(IsStackSlot() || IsDoubleStackSlot()); + // Decode stack index manually to preserve sign. + return GetPayload() - kStackIndexBias; + } + + intptr_t GetHighStackIndex(uintptr_t word_size) const { + DCHECK(IsDoubleStackSlot()); + // Decode stack index manually to preserve sign. + return GetPayload() - kStackIndexBias + word_size; + } + + static Location QuickParameter(uint32_t parameter_index) { + return Location(kQuickParameter, parameter_index); + } + + uint32_t GetQuickParameterIndex() const { + DCHECK(IsQuickParameter()); + return GetPayload(); + } + + bool IsQuickParameter() const { + return GetKind() == kQuickParameter; + } + + arm::ArmManagedRegister AsArm() const; + x86::X86ManagedRegister AsX86() const; + + Kind GetKind() const { + return KindField::Decode(value_); + } + + bool Equals(Location other) const { + return value_ == other.value_; + } + + const char* DebugString() const { + switch (GetKind()) { + case kInvalid: return "?"; + case kRegister: return "R"; + case kStackSlot: return "S"; + case kDoubleStackSlot: return "DS"; + case kQuickParameter: return "Q"; + case kUnallocated: return "U"; + } + return "?"; + } + + // Unallocated locations. + enum Policy { + kAny, + kRequiresRegister, + kSameAsFirstInput, + }; + + bool IsUnallocated() const { + return GetKind() == kUnallocated; + } + + static Location UnallocatedLocation(Policy policy) { + return Location(kUnallocated, PolicyField::Encode(policy)); + } + + // Any free register is suitable to replace this unallocated location. + static Location Any() { + return UnallocatedLocation(kAny); + } + + static Location RequiresRegister() { + return UnallocatedLocation(kRequiresRegister); + } + + // The location of the first input to the instruction will be + // used to replace this unallocated location. + static Location SameAsFirstInput() { + return UnallocatedLocation(kSameAsFirstInput); + } + + Policy GetPolicy() const { + DCHECK(IsUnallocated()); + return PolicyField::Decode(GetPayload()); + } + + uword GetEncoding() const { + return GetPayload(); + } + + private: + // Number of bits required to encode Kind value. + static constexpr uint32_t kBitsForKind = 4; + static constexpr uint32_t kBitsForPayload = kWordSize * kBitsPerByte - kBitsForKind; + + explicit Location(uword value) : value_(value) {} + + Location(Kind kind, uword payload) + : value_(KindField::Encode(kind) | PayloadField::Encode(payload)) {} + + uword GetPayload() const { + return PayloadField::Decode(value_); + } + + typedef BitField<Kind, 0, kBitsForKind> KindField; + typedef BitField<uword, kBitsForKind, kBitsForPayload> PayloadField; + + // Layout for kUnallocated locations payload. + typedef BitField<Policy, 0, 3> PolicyField; + + // Layout for stack slots. + static const intptr_t kStackIndexBias = + static_cast<intptr_t>(1) << (kBitsForPayload - 1); + + // Location either contains kind and payload fields or a tagged handle for + // a constant locations. Values of enumeration Kind are selected in such a + // way that none of them can be interpreted as a kConstant tag. + uword value_; +}; + +/** + * The code generator computes LocationSummary for each instruction so that + * the instruction itself knows what code to generate: where to find the inputs + * and where to place the result. + * + * The intent is to have the code for generating the instruction independent of + * register allocation. A register allocator just has to provide a LocationSummary. + */ +class LocationSummary : public ArenaObject { + public: + explicit LocationSummary(HInstruction* instruction); + + void SetInAt(uint32_t at, Location location) { + inputs_.Put(at, location); + } + + Location InAt(uint32_t at) const { + return inputs_.Get(at); + } + + size_t GetInputCount() const { + return inputs_.Size(); + } + + void SetOut(Location location) { + output_ = Location(location); + } + + void AddTemp(Location location) { + temps_.Add(location); + } + + Location GetTemp(uint32_t at) const { + return temps_.Get(at); + } + + void SetTempAt(uint32_t at, Location location) { + temps_.Put(at, location); + } + + size_t GetTempCount() const { + return temps_.Size(); + } + + Location Out() const { return output_; } + + private: + GrowableArray<Location> inputs_; + GrowableArray<Location> temps_; + Location output_; + + DISALLOW_COPY_AND_ASSIGN(LocationSummary); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_LOCATIONS_H_ diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index afaedd7f18..74ba5208c4 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -291,6 +291,17 @@ bool HBasicBlock::Dominates(HBasicBlock* other) const { return false; } +void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) { + DCHECK(cursor->AsPhi() == nullptr); + DCHECK(instruction->AsPhi() == nullptr); + instruction->next_ = cursor; + instruction->previous_ = cursor->previous_; + cursor->previous_ = instruction; + if (GetFirstInstruction() == cursor) { + instructions_.first_instruction_ = instruction; + } +} + static void Add(HInstructionList* instruction_list, HBasicBlock* block, HInstruction* instruction) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index a2cb1c47ae..476f24e491 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_NODES_H_ #define ART_COMPILER_OPTIMIZING_NODES_H_ +#include "locations.h" #include "utils/allocation.h" #include "utils/arena_bit_vector.h" #include "utils/growable_array.h" @@ -315,6 +316,7 @@ class HBasicBlock : public ArenaObject { void AddInstruction(HInstruction* instruction); void RemoveInstruction(HInstruction* instruction); + void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); void AddPhi(HPhi* phi); void RemovePhi(HPhi* phi); @@ -383,6 +385,7 @@ class HBasicBlock : public ArenaObject { M(NewInstance) \ M(Not) \ M(ParameterValue) \ + M(ParallelMove) \ M(Phi) \ M(Return) \ M(ReturnVoid) \ @@ -1102,6 +1105,88 @@ class HPhi : public HInstruction { DISALLOW_COPY_AND_ASSIGN(HPhi); }; +class MoveOperands : public ArenaObject { + public: + MoveOperands(Location source, Location destination) + : source_(source), destination_(destination) {} + + Location GetSource() const { return source_; } + Location GetDestination() const { return destination_; } + + void SetSource(Location value) { source_ = value; } + void SetDestination(Location value) { destination_ = value; } + + // The parallel move resolver marks moves as "in-progress" by clearing the + // destination (but not the source). + Location MarkPending() { + DCHECK(!IsPending()); + Location dest = destination_; + destination_ = Location::NoLocation(); + return dest; + } + + void ClearPending(Location dest) { + DCHECK(IsPending()); + destination_ = dest; + } + + bool IsPending() const { + DCHECK(!source_.IsInvalid() || destination_.IsInvalid()); + return destination_.IsInvalid() && !source_.IsInvalid(); + } + + // True if this blocks a move from the given location. + bool Blocks(Location loc) const { + return !IsEliminated() && source_.Equals(loc); + } + + // A move is redundant if it's been eliminated, if its source and + // destination are the same, or if its destination is unneeded. + bool IsRedundant() const { + return IsEliminated() || destination_.IsInvalid() || source_.Equals(destination_); + } + + // We clear both operands to indicate move that's been eliminated. + void Eliminate() { + source_ = destination_ = Location::NoLocation(); + } + + bool IsEliminated() const { + DCHECK(!source_.IsInvalid() || destination_.IsInvalid()); + return source_.IsInvalid(); + } + + private: + Location source_; + Location destination_; + + DISALLOW_COPY_AND_ASSIGN(MoveOperands); +}; + +static constexpr size_t kDefaultNumberOfMoves = 4; + +class HParallelMove : public HTemplateInstruction<0> { + public: + explicit HParallelMove(ArenaAllocator* arena) : moves_(arena, kDefaultNumberOfMoves) {} + + void AddMove(MoveOperands* move) { + moves_.Add(move); + } + + MoveOperands* MoveOperandsAt(size_t index) const { + return moves_.Get(index); + } + + size_t NumMoves() const { return moves_.Size(); } + + DECLARE_INSTRUCTION(ParallelMove) + + private: + GrowableArray<MoveOperands*> moves_; + + DISALLOW_COPY_AND_ASSIGN(HParallelMove); +}; + class HGraphVisitor : public ValueObject { public: explicit HGraphVisitor(HGraph* graph) : graph_(graph) { } diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc new file mode 100644 index 0000000000..3d2d136ec3 --- /dev/null +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "parallel_move_resolver.h" +#include "nodes.h" +#include "locations.h" + +namespace art { + +void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) { + DCHECK(moves_.IsEmpty()); + // Build up a worklist of moves. + BuildInitialMoveList(parallel_move); + + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& move = *moves_.Get(i); + // Skip constants to perform them last. They don't block other moves + // and skipping such moves with register destinations keeps those + // registers free for the whole algorithm. + if (!move.IsEliminated() && !move.GetSource().IsConstant()) { + PerformMove(i); + } + } + + // Perform the moves with constant sources. + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& move = *moves_.Get(i); + if (!move.IsEliminated()) { + DCHECK(move.GetSource().IsConstant()); + EmitMove(i); + } + } + + moves_.Reset(); +} + + +void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { + // Perform a linear sweep of the moves to add them to the initial list of + // moves to perform, ignoring any move that is redundant (the source is + // the same as the destination, the destination is ignored and + // unallocated, or the move was already eliminated). + for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { + MoveOperands* move = parallel_move->MoveOperandsAt(i); + if (!move->IsRedundant()) { + moves_.Add(move); + } + } +} + + +void ParallelMoveResolver::PerformMove(size_t index) { + // Each call to this function performs a move and deletes it from the move + // graph. We first recursively perform any move blocking this one. We + // mark a move as "pending" on entry to PerformMove in order to detect + // cycles in the move graph. We use operand swaps to resolve cycles, + // which means that a call to PerformMove could change any source operand + // in the move graph. + + DCHECK(!moves_.Get(index)->IsPending()); + DCHECK(!moves_.Get(index)->IsRedundant()); + + // Clear this move's destination to indicate a pending move. The actual + // destination is saved in a stack-allocated local. Recursion may allow + // multiple moves to be pending. + DCHECK(!moves_.Get(index)->GetSource().IsInvalid()); + Location destination = moves_.Get(index)->MarkPending(); + + // Perform a depth-first traversal of the move graph to resolve + // dependencies. Any unperformed, unpending move with a source the same + // as this one's destination blocks this one so recursively perform all + // such moves. + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& other_move = *moves_.Get(i); + if (other_move.Blocks(destination) && !other_move.IsPending()) { + // Though PerformMove can change any source operand in the move graph, + // this call cannot create a blocking move via a swap (this loop does + // not miss any). Assume there is a non-blocking move with source A + // and this move is blocked on source B and there is a swap of A and + // B. Then A and B must be involved in the same cycle (or they would + // not be swapped). Since this move's destination is B and there is + // only a single incoming edge to an operand, this move must also be + // involved in the same cycle. In that case, the blocking move will + // be created but will be "pending" when we return from PerformMove. + PerformMove(i); + } + } + MoveOperands* move = moves_.Get(index); + + // We are about to resolve this move and don't need it marked as + // pending, so restore its destination. + move->ClearPending(destination); + + // This move's source may have changed due to swaps to resolve cycles and + // so it may now be the last move in the cycle. If so remove it. + if (move->GetSource().Equals(destination)) { + move->Eliminate(); + return; + } + + // The move may be blocked on a (at most one) pending move, in which case + // we have a cycle. Search for such a blocking move and perform a swap to + // resolve it. + bool do_swap = false; + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& other_move = *moves_.Get(i); + if (other_move.Blocks(destination)) { + DCHECK(other_move.IsPending()); + do_swap = true; + break; + } + } + + if (do_swap) { + EmitSwap(index); + // Any unperformed (including pending) move with a source of either + // this move's source or destination needs to have their source + // changed to reflect the state of affairs after the swap. + Location source = move->GetSource(); + Location destination = move->GetDestination(); + move->Eliminate(); + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& other_move = *moves_.Get(i); + if (other_move.Blocks(source)) { + moves_.Get(i)->SetSource(destination); + } else if (other_move.Blocks(destination)) { + moves_.Get(i)->SetSource(source); + } + } + } else { + // This move is not blocked. + EmitMove(index); + move->Eliminate(); + } +} + +} // namespace art diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h new file mode 100644 index 0000000000..ff20cb0bc6 --- /dev/null +++ b/compiler/optimizing/parallel_move_resolver.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ +#define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ + +#include "utils/allocation.h" +#include "utils/growable_array.h" + +namespace art { + +class HParallelMove; +class MoveOperands; + +/** + * Helper class to resolve a set of parallel moves. Architecture dependent code + * generator must have their own subclass that implements the `EmitMove` and `EmitSwap` + * operations. + */ +class ParallelMoveResolver : public ValueObject { + public: + explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {} + virtual ~ParallelMoveResolver() {} + + // Resolve a set of parallel moves, emitting assembler instructions. + void EmitNativeCode(HParallelMove* parallel_move); + + protected: + // Emit a move. + virtual void EmitMove(size_t index) = 0; + + // Execute a move by emitting a swap of two operands. + virtual void EmitSwap(size_t index) = 0; + + // List of moves not yet resolved. + GrowableArray<MoveOperands*> moves_; + + private: + // Build the initial list of moves. + void BuildInitialMoveList(HParallelMove* parallel_move); + + // Perform the move at the moves_ index in question (possibly requiring + // other moves to satisfy dependencies). + void PerformMove(size_t index); + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc new file mode 100644 index 0000000000..88df24d9ac --- /dev/null +++ b/compiler/optimizing/parallel_move_test.cc @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nodes.h" +#include "parallel_move_resolver.h" +#include "utils/arena_allocator.h" + +#include "gtest/gtest.h" + +namespace art { + +class TestParallelMoveResolver : public ParallelMoveResolver { + public: + explicit TestParallelMoveResolver(ArenaAllocator* allocator) : ParallelMoveResolver(allocator) {} + + virtual void EmitMove(size_t index) { + MoveOperands* move = moves_.Get(index); + if (!message_.str().empty()) { + message_ << " "; + } + message_ << "(" + << move->GetSource().reg().RegId() + << " -> " + << move->GetDestination().reg().RegId() + << ")"; + } + + virtual void EmitSwap(size_t index) { + MoveOperands* move = moves_.Get(index); + if (!message_.str().empty()) { + message_ << " "; + } + message_ << "(" + << move->GetSource().reg().RegId() + << " <-> " + << move->GetDestination().reg().RegId() + << ")"; + } + + std::string GetMessage() const { + return message_.str(); + } + + private: + std::ostringstream message_; + + + DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolver); +}; + +static HParallelMove* BuildParallelMove(ArenaAllocator* allocator, + const size_t operands[][2], + size_t number_of_moves) { + HParallelMove* moves = new (allocator) HParallelMove(allocator); + for (size_t i = 0; i < number_of_moves; ++i) { + moves->AddMove(new (allocator) MoveOperands( + Location::RegisterLocation(ManagedRegister(operands[i][0])), + Location::RegisterLocation(ManagedRegister(operands[i][1])))); + } + return moves; +} + +TEST(ParallelMoveTest, Dependency) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + { + TestParallelMoveResolver resolver(&allocator); + static constexpr size_t moves[][2] = {{0, 1}, {1, 2}}; + resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); + ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str()); + } + + { + TestParallelMoveResolver resolver(&allocator); + static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {1, 4}}; + resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); + ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str()); + } +} + +TEST(ParallelMoveTest, Swap) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + { + TestParallelMoveResolver resolver(&allocator); + static constexpr size_t moves[][2] = {{0, 1}, {1, 0}}; + resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); + ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str()); + } + + { + TestParallelMoveResolver resolver(&allocator); + static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {1, 0}}; + resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); + ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str()); + } + + { + TestParallelMoveResolver resolver(&allocator); + static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}}; + resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); + ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1)", resolver.GetMessage().c_str()); + } + + { + TestParallelMoveResolver resolver(&allocator); + static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}, {5, 4}}; + resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); + ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1) (5 -> 4)", resolver.GetMessage().c_str()); + } +} + +} // namespace art diff --git a/compiler/output_stream.h b/compiler/output_stream.h index 478a854f26..97ccc2caa6 100644 --- a/compiler/output_stream.h +++ b/compiler/output_stream.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OUTPUT_STREAM_H_ #include <stdint.h> +#include <sys/types.h> #include <string> diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk index 0ded2d8b10..03d32f01bc 100644 --- a/dalvikvm/Android.mk +++ b/dalvikvm/Android.mk @@ -51,6 +51,7 @@ LOCAL_SHARED_LIBRARIES := libnativehelper LOCAL_LDFLAGS := -ldl -lpthread LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk LOCAL_IS_HOST_MODULE := true +include external/libcxx/libcxx.mk include $(BUILD_HOST_EXECUTABLE) ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE) endif diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 9914875d5e..f0b575041d 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -33,7 +33,7 @@ #include "compiler.h" #include "compiler_callbacks.h" #include "dex_file-inl.h" -#include "dex/pass_driver.h" +#include "dex/pass_driver_me.h" #include "dex/verification_results.h" #include "driver/compiler_callbacks_impl.h" #include "driver/compiler_driver.h" @@ -918,10 +918,10 @@ static int dex2oat(int argc, char** argv) { } else if (option == "--no-profile-file") { // No profile } else if (option == "--print-pass-names") { - PassDriver::PrintPassNames(); + PassDriverME::PrintPassNames(); } else if (option.starts_with("--disable-passes=")) { std::string disable_passes = option.substr(strlen("--disable-passes=")).data(); - PassDriver::CreateDefaultPassList(disable_passes); + PassDriverME::CreateDefaultPassList(disable_passes); } else { Usage("Unknown argument %s", option.data()); } diff --git a/disassembler/Android.mk b/disassembler/Android.mk index dd4e9d5de7..814323cda2 100644 --- a/disassembler/Android.mk +++ b/disassembler/Android.mk @@ -87,8 +87,8 @@ define build-libart-disassembler LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk + include external/libcxx/libcxx.mk ifeq ($$(art_target_or_host),target) - include external/libcxx/libcxx.mk LOCAL_SHARED_LIBRARIES += libcutils libvixl include $(BUILD_SHARED_LIBRARY) else # host diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index dcae50284a..d38f37f0f3 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -76,6 +76,13 @@ static void usage() { " Example: --boot-image=/system/framework/boot.art\n" "\n"); fprintf(stderr, + " --instruction-set=(arm|arm64|mips|x86|x86_64): for locating the image file based on the image location\n" + " set.\n" + " Example: --instruction-set=x86\n" + " Default: %s\n" + "\n", + GetInstructionSetString(kRuntimeISA)); + fprintf(stderr, " --output=<file> may be used to send the output to a file.\n" " Example: --output=/tmp/oatdump.txt\n" "\n"); @@ -1461,8 +1468,9 @@ static int oatdump(int argc, char** argv) { } const char* oat_filename = NULL; - const char* image_filename = NULL; - const char* boot_image_filename = NULL; + const char* image_location = NULL; + const char* boot_image_location = NULL; + InstructionSet instruction_set = kRuntimeISA; std::string elf_filename_prefix; std::ostream* os = &std::cout; std::unique_ptr<std::ofstream> out; @@ -1474,9 +1482,22 @@ static int oatdump(int argc, char** argv) { if (option.starts_with("--oat-file=")) { oat_filename = option.substr(strlen("--oat-file=")).data(); } else if (option.starts_with("--image=")) { - image_filename = option.substr(strlen("--image=")).data(); + image_location = option.substr(strlen("--image=")).data(); } else if (option.starts_with("--boot-image=")) { - boot_image_filename = option.substr(strlen("--boot-image=")).data(); + boot_image_location = option.substr(strlen("--boot-image=")).data(); + } else if (option.starts_with("--instruction-set=")) { + StringPiece instruction_set_str = option.substr(strlen("--instruction-set=")).data(); + if (instruction_set_str == "arm") { + instruction_set = kThumb2; + } else if (instruction_set_str == "arm64") { + instruction_set = kArm64; + } else if (instruction_set_str == "mips") { + instruction_set = kMips; + } else if (instruction_set_str == "x86") { + instruction_set = kX86; + } else if (instruction_set_str == "x86_64") { + instruction_set = kX86_64; + } } else if (option.starts_with("--dump:")) { if (option == "--dump:raw_mapping_table") { dump_raw_mapping_table = true; @@ -1500,12 +1521,12 @@ static int oatdump(int argc, char** argv) { } } - if (image_filename == NULL && oat_filename == NULL) { + if (image_location == NULL && oat_filename == NULL) { fprintf(stderr, "Either --image or --oat must be specified\n"); return EXIT_FAILURE; } - if (image_filename != NULL && oat_filename != NULL) { + if (image_location != NULL && oat_filename != NULL) { fprintf(stderr, "Either --image or --oat must be specified but not both\n"); return EXIT_FAILURE; } @@ -1533,16 +1554,19 @@ static int oatdump(int argc, char** argv) { NoopCompilerCallbacks callbacks; options.push_back(std::make_pair("compilercallbacks", &callbacks)); - if (boot_image_filename != NULL) { + if (boot_image_location != NULL) { boot_image_option += "-Ximage:"; - boot_image_option += boot_image_filename; + boot_image_option += boot_image_location; options.push_back(std::make_pair(boot_image_option.c_str(), reinterpret_cast<void*>(NULL))); } - if (image_filename != NULL) { + if (image_location != NULL) { image_option += "-Ximage:"; - image_option += image_filename; + image_option += image_location; options.push_back(std::make_pair(image_option.c_str(), reinterpret_cast<void*>(NULL))); } + options.push_back( + std::make_pair("imageinstructionset", + reinterpret_cast<const void*>(GetInstructionSetString(instruction_set)))); if (!Runtime::Create(options, false)) { fprintf(stderr, "Failed to create runtime\n"); @@ -1558,7 +1582,7 @@ static int oatdump(int argc, char** argv) { CHECK(image_space != NULL); const ImageHeader& image_header = image_space->GetImageHeader(); if (!image_header.IsValid()) { - fprintf(stderr, "Invalid image header %s\n", image_filename); + fprintf(stderr, "Invalid image header %s\n", image_location); return EXIT_FAILURE; } ImageDumper image_dumper(os, *image_space, image_header, diff --git a/runtime/Android.mk b/runtime/Android.mk index 1521caa05f..c2507b1457 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -308,6 +308,12 @@ ifeq ($(ART_USE_PORTABLE_COMPILER),true) LIBART_CFLAGS += -DART_USE_PORTABLE_COMPILER=1 endif +ifeq ($(MALLOC_IMPL),jemalloc) + LIBART_CFLAGS += -DUSE_JEMALLOC +else + LIBART_CFLAGS += -DUSE_DLMALLOC +endif + # $(1): target or host # $(2): ndebug or debug # $(3): true or false for LOCAL_CLANG @@ -397,12 +403,8 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT endif LOCAL_C_INCLUDES += $(ART_C_INCLUDES) LOCAL_SHARED_LIBRARIES += liblog libnativehelper - ifeq ($$(art_target_or_host),target) - include external/libcxx/libcxx.mk - LOCAL_SHARED_LIBRARIES += libbacktrace_libc++ - else - LOCAL_SHARED_LIBRARIES += libbacktrace - endif + include external/libcxx/libcxx.mk + LOCAL_SHARED_LIBRARIES += libbacktrace_libc++ ifeq ($$(art_target_or_host),target) LOCAL_SHARED_LIBRARIES += libcutils libdl libselinux libutils LOCAL_STATIC_LIBRARIES := libziparchive libz diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index 2a5c7d1e8e..cb9f53b72a 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -84,12 +84,6 @@ extern "C" float fmodf(float a, float b); // REM_FLOAT[_2ADDR] // Double-precision FP arithmetics. extern "C" double fmod(double a, double b); // REM_DOUBLE[_2ADDR] -// Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR] -extern "C" int64_t art_quick_mul_long(int64_t, int64_t); -extern "C" uint64_t art_quick_shl_long(uint64_t, uint32_t); -extern "C" uint64_t art_quick_shr_long(uint64_t, uint32_t); -extern "C" uint64_t art_quick_ushr_long(uint64_t, uint32_t); - // Intrinsic entrypoints. extern "C" int32_t __memcmp16(void*, void*, int32_t); extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t); @@ -199,10 +193,10 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, qpoints->pF2l = NULL; qpoints->pLdiv = NULL; qpoints->pLmod = NULL; - qpoints->pLmul = art_quick_mul_long; - qpoints->pShlLong = art_quick_shl_long; - qpoints->pShrLong = art_quick_shr_long; - qpoints->pUshrLong = art_quick_ushr_long; + qpoints->pLmul = NULL; + qpoints->pShlLong = NULL; + qpoints->pShrLong = NULL; + qpoints->pUshrLong = NULL; // Intrinsics qpoints->pIndexOf = art_quick_indexof; diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index ac922ddecd..7f31fb6881 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1611,10 +1611,6 @@ END art_quick_to_interpreter_bridge UNIMPLEMENTED art_quick_instrumentation_entry UNIMPLEMENTED art_quick_instrumentation_exit UNIMPLEMENTED art_quick_deoptimize -UNIMPLEMENTED art_quick_mul_long -UNIMPLEMENTED art_quick_shl_long -UNIMPLEMENTED art_quick_shr_long -UNIMPLEMENTED art_quick_ushr_long UNIMPLEMENTED art_quick_indexof /* diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc index 26cd8646d8..9f36927877 100644 --- a/runtime/arch/x86/thread_x86.cc +++ b/runtime/arch/x86/thread_x86.cc @@ -40,10 +40,9 @@ struct descriptor_table_entry_t { namespace art { -static Mutex modify_ldt_lock("modify_ldt lock"); - void Thread::InitCpu() { - MutexLock mu(Thread::Current(), modify_ldt_lock); + // Take the ldt lock, Thread::Current isn't yet established. + MutexLock mu(nullptr, *Locks::modify_ldt_lock_); const uintptr_t base = reinterpret_cast<uintptr_t>(this); const size_t limit = kPageSize; @@ -138,7 +137,7 @@ void Thread::InitCpu() { } void Thread::CleanupCpu() { - MutexLock mu(Thread::Current(), modify_ldt_lock); + MutexLock mu(this, *Locks::modify_ldt_lock_); // Sanity check that reads from %fs point to this Thread*. Thread* self_check; diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h index adf4c66aa4..6c415e77e3 100644 --- a/runtime/base/mutex-inl.h +++ b/runtime/base/mutex-inl.h @@ -132,9 +132,21 @@ static inline void CheckUnattachedThread(LockLevel level) NO_THREAD_SAFETY_ANALY // TODO: tighten this check. if (kDebugLocking) { Runtime* runtime = Runtime::Current(); - CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDownLocked() || - level == kDefaultMutexLevel || level == kRuntimeShutdownLock || - level == kThreadListLock || level == kLoggingLock || level == kAbortLock); + CHECK(runtime == nullptr || !runtime->IsStarted() || runtime->IsShuttingDownLocked() || + // Used during thread creation to avoid races with runtime shutdown. Thread::Current not + // yet established. + level == kRuntimeShutdownLock || + // Thread Ids are allocated/released before threads are established. + level == kAllocatedThreadIdsLock || + // Thread LDT's are initialized without Thread::Current established. + level == kModifyLdtLock || + // Threads are unregistered while holding the thread list lock, during this process they + // no longer exist and so we expect an unlock with no self. + level == kThreadListLock || + // Ignore logging which may or may not have set up thread data structures. + level == kLoggingLock || + // Avoid recursive death. + level == kAbortLock); } } diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index 6f7f2c1e99..705be40769 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -30,10 +30,12 @@ namespace art { Mutex* Locks::abort_lock_ = nullptr; +Mutex* Locks::allocated_thread_ids_lock_ = nullptr; Mutex* Locks::breakpoint_lock_ = nullptr; ReaderWriterMutex* Locks::classlinker_classes_lock_ = nullptr; ReaderWriterMutex* Locks::heap_bitmap_lock_ = nullptr; Mutex* Locks::logging_lock_ = nullptr; +Mutex* Locks::modify_ldt_lock_ = nullptr; ReaderWriterMutex* Locks::mutator_lock_ = nullptr; Mutex* Locks::runtime_shutdown_lock_ = nullptr; Mutex* Locks::thread_list_lock_ = nullptr; @@ -814,7 +816,13 @@ void ConditionVariable::TimedWait(Thread* self, int64_t ms, int32_t ns) { void Locks::Init() { if (logging_lock_ != nullptr) { // Already initialized. + if (kRuntimeISA == kX86) { + DCHECK(modify_ldt_lock_ != nullptr); + } else { + DCHECK(modify_ldt_lock_ == nullptr); + } DCHECK(abort_lock_ != nullptr); + DCHECK(allocated_thread_ids_lock_ != nullptr); DCHECK(breakpoint_lock_ != nullptr); DCHECK(classlinker_classes_lock_ != nullptr); DCHECK(heap_bitmap_lock_ != nullptr); @@ -827,32 +835,76 @@ void Locks::Init() { DCHECK(unexpected_signal_lock_ != nullptr); DCHECK(intern_table_lock_ != nullptr); } else { - logging_lock_ = new Mutex("logging lock", kLoggingLock, true); - abort_lock_ = new Mutex("abort lock", kAbortLock, true); + // Create global locks in level order from highest lock level to lowest. + LockLevel current_lock_level = kMutatorLock; + DCHECK(mutator_lock_ == nullptr); + mutator_lock_ = new ReaderWriterMutex("mutator lock", current_lock_level); - DCHECK(breakpoint_lock_ == nullptr); - breakpoint_lock_ = new Mutex("breakpoint lock", kBreakpointLock); - DCHECK(classlinker_classes_lock_ == nullptr); - classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock", - kClassLinkerClassesLock); + #define UPDATE_CURRENT_LOCK_LEVEL(new_level) \ + DCHECK_LT(new_level, current_lock_level); \ + current_lock_level = new_level; + + UPDATE_CURRENT_LOCK_LEVEL(kHeapBitmapLock); DCHECK(heap_bitmap_lock_ == nullptr); - heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock); - DCHECK(mutator_lock_ == nullptr); - mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock); + heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", current_lock_level); + + UPDATE_CURRENT_LOCK_LEVEL(kRuntimeShutdownLock); DCHECK(runtime_shutdown_lock_ == nullptr); - runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", kRuntimeShutdownLock); + runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", current_lock_level); + + UPDATE_CURRENT_LOCK_LEVEL(kProfilerLock); + DCHECK(profiler_lock_ == nullptr); + profiler_lock_ = new Mutex("profiler lock", current_lock_level); + + UPDATE_CURRENT_LOCK_LEVEL(kTraceLock); + DCHECK(trace_lock_ == nullptr); + trace_lock_ = new Mutex("trace lock", current_lock_level); + + UPDATE_CURRENT_LOCK_LEVEL(kThreadListLock); DCHECK(thread_list_lock_ == nullptr); - thread_list_lock_ = new Mutex("thread list lock", kThreadListLock); + thread_list_lock_ = new Mutex("thread list lock", current_lock_level); + + UPDATE_CURRENT_LOCK_LEVEL(kBreakpointLock); + DCHECK(breakpoint_lock_ == nullptr); + breakpoint_lock_ = new Mutex("breakpoint lock", current_lock_level); + + UPDATE_CURRENT_LOCK_LEVEL(kClassLinkerClassesLock); + DCHECK(classlinker_classes_lock_ == nullptr); + classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock", + current_lock_level); + + UPDATE_CURRENT_LOCK_LEVEL(kAllocatedThreadIdsLock); + DCHECK(allocated_thread_ids_lock_ == nullptr); + allocated_thread_ids_lock_ = new Mutex("allocated thread ids lock", current_lock_level); + + if (kRuntimeISA == kX86) { + UPDATE_CURRENT_LOCK_LEVEL(kModifyLdtLock); + DCHECK(modify_ldt_lock_ == nullptr); + modify_ldt_lock_ = new Mutex("modify_ldt lock", current_lock_level); + } + + UPDATE_CURRENT_LOCK_LEVEL(kInternTableLock); + DCHECK(intern_table_lock_ == nullptr); + intern_table_lock_ = new Mutex("InternTable lock", current_lock_level); + + + UPDATE_CURRENT_LOCK_LEVEL(kAbortLock); + DCHECK(abort_lock_ == nullptr); + abort_lock_ = new Mutex("abort lock", current_lock_level, true); + + UPDATE_CURRENT_LOCK_LEVEL(kThreadSuspendCountLock); DCHECK(thread_suspend_count_lock_ == nullptr); - thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock); - DCHECK(trace_lock_ == nullptr); - trace_lock_ = new Mutex("trace lock", kTraceLock); - DCHECK(profiler_lock_ == nullptr); - profiler_lock_ = new Mutex("profiler lock", kProfilerLock); + thread_suspend_count_lock_ = new Mutex("thread suspend count lock", current_lock_level); + + UPDATE_CURRENT_LOCK_LEVEL(kUnexpectedSignalLock); DCHECK(unexpected_signal_lock_ == nullptr); - unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true); - DCHECK(intern_table_lock_ == nullptr); - intern_table_lock_ = new Mutex("InternTable lock", kInternTableLock); + unexpected_signal_lock_ = new Mutex("unexpected signal lock", current_lock_level, true); + + UPDATE_CURRENT_LOCK_LEVEL(kLoggingLock); + DCHECK(logging_lock_ == nullptr); + logging_lock_ = new Mutex("logging lock", current_lock_level, true); + + #undef UPDATE_CURRENT_LOCK_LEVEL } } diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index e13c8d5d62..522692e6f3 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -74,6 +74,8 @@ enum LockLevel { kPinTableLock, kLoadLibraryLock, kJdwpObjectRegistryLock, + kModifyLdtLock, + kAllocatedThreadIdsLock, kClassLinkerClassesLock, kBreakpointLock, kMonitorLock, @@ -532,28 +534,34 @@ class Locks { // Guards shutdown of the runtime. static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_); + // Guards background profiler global state. + static Mutex* profiler_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_); + + // Guards trace (ie traceview) requests. + static Mutex* trace_lock_ ACQUIRED_AFTER(profiler_lock_); + // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads // attaching and detaching. - static Mutex* thread_list_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_); + static Mutex* thread_list_lock_ ACQUIRED_AFTER(trace_lock_); // Guards breakpoints. static Mutex* breakpoint_lock_ ACQUIRED_AFTER(thread_list_lock_); - // Guards trace requests. - static Mutex* trace_lock_ ACQUIRED_AFTER(breakpoint_lock_); - - // Guards profile objects. - static Mutex* profiler_lock_ ACQUIRED_AFTER(trace_lock_); - // Guards lists of classes within the class linker. - static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(profiler_lock_); + static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(breakpoint_lock_); // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code // doesn't try to hold a higher level Mutex. #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(Locks::classlinker_classes_lock_) + // Guard the allocation/deallocation of thread ids. + static Mutex* allocated_thread_ids_lock_ ACQUIRED_AFTER(classlinker_classes_lock_); + + // Guards modification of the LDT on x86. + static Mutex* modify_ldt_lock_ ACQUIRED_AFTER(allocated_thread_ids_lock_); + // Guards intern table. - static Mutex* intern_table_lock_ ACQUIRED_AFTER(classlinker_classes_lock_); + static Mutex* intern_table_lock_ ACQUIRED_AFTER(modify_ldt_lock_); // Have an exclusive aborting thread. static Mutex* abort_lock_ ACQUIRED_AFTER(classlinker_classes_lock_); diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 7136c67b6f..8c8a3556da 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -4039,7 +4039,11 @@ void Dbg::DdmSendHeapSegments(bool native) { // Send a series of heap segment chunks. HeapChunkContext context((what == HPSG_WHAT_MERGED_OBJECTS), native); if (native) { +#ifdef USE_DLMALLOC dlmalloc_inspect_all(HeapChunkContext::HeapChunkCallback, &context); +#else + UNIMPLEMENTED(WARNING) << "Native heap inspection is only supported with dlmalloc"; +#endif } else { gc::Heap* heap = Runtime::Current()->GetHeap(); const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces(); diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index ea1ccdd665..fdc43671ce 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -893,10 +893,16 @@ void Heap::Trim() { uint64_t gc_heap_end_ns = NanoTime(); // We never move things in the native heap, so we can finish the GC at this point. FinishGC(self, collector::kGcTypeNone); + size_t native_reclaimed = 0; +#if defined(USE_DLMALLOC) // Trim the native heap. dlmalloc_trim(0); - size_t native_reclaimed = 0; dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed); +#elif defined(USE_JEMALLOC) + // Jemalloc does it's own internal trimming. +#else + UNIMPLEMENTED(WARNING) << "Add trimming support"; +#endif uint64_t end_ns = NanoTime(); VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns) << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration=" diff --git a/runtime/monitor.cc b/runtime/monitor.cc index f783edbfc3..c53520de12 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -111,7 +111,7 @@ bool Monitor::Install(Thread* self) { MutexLock mu(self, monitor_lock_); // Uncontended mutex acquisition as monitor isn't yet public. CHECK(owner_ == nullptr || owner_ == self || owner_->IsSuspended()); // Propagate the lock state. - LockWord lw(obj_->GetLockWord(false)); + LockWord lw(GetObject()->GetLockWord(false)); switch (lw.GetState()) { case LockWord::kThinLocked: { CHECK_EQ(owner_->GetThreadId(), lw.ThinLockOwner()); @@ -137,7 +137,7 @@ bool Monitor::Install(Thread* self) { } LockWord fat(this); // Publish the updated lock word, which may race with other threads. - bool success = obj_->CasLockWord(lw, fat); + bool success = GetObject()->CasLockWord(lw, fat); // Lock profiling. if (success && owner_ != nullptr && lock_profiling_threshold_ != 0) { locking_method_ = owner_->GetCurrentMethod(&locking_dex_pc_); @@ -226,9 +226,9 @@ void Monitor::Lock(Thread* self) { // Do this before releasing the lock so that we don't get deflated. ++num_waiters_; monitor_lock_.Unlock(self); // Let go of locks in order. + self->SetMonitorEnterObject(GetObject()); { ScopedThreadStateChange tsc(self, kBlocked); // Change to blocked and give up mutator_lock_. - self->SetMonitorEnterObject(obj_); MutexLock mu2(self, monitor_lock_); // Reacquire monitor_lock_ without mutator_lock_ for Wait. if (owner_ != NULL) { // Did the owner_ give the lock up? monitor_contenders_.Wait(self); // Still contended so wait. @@ -249,8 +249,8 @@ void Monitor::Lock(Thread* self) { } } } - self->SetMonitorEnterObject(nullptr); } + self->SetMonitorEnterObject(nullptr); monitor_lock_.Lock(self); // Reacquire locks in order. --num_waiters_; } @@ -363,7 +363,7 @@ bool Monitor::Unlock(Thread* self) { // We don't own this, so we're not allowed to unlock it. // The JNI spec says that we should throw IllegalMonitorStateException // in this case. - FailedUnlock(obj_, self, owner, this); + FailedUnlock(GetObject(), self, owner, this); return false; } return true; @@ -895,7 +895,7 @@ void Monitor::DescribeWait(std::ostream& os, const Thread* thread) { MutexLock mu(self, *thread->GetWaitMutex()); Monitor* monitor = thread->GetWaitMonitor(); if (monitor != nullptr) { - pretty_object = monitor->obj_; + pretty_object = monitor->GetObject(); } } else if (state == kBlocked) { wait_message = " - waiting to lock "; @@ -1101,12 +1101,13 @@ void MonitorList::SweepMonitorList(IsMarkedCallback* callback, void* arg) { MutexLock mu(Thread::Current(), monitor_list_lock_); for (auto it = list_.begin(); it != list_.end(); ) { Monitor* m = *it; - mirror::Object* obj = m->GetObject(); + // Disable the read barrier in GetObject() as this is called by GC. + mirror::Object* obj = m->GetObject<kWithoutReadBarrier>(); // The object of a monitor can be null if we have deflated it. mirror::Object* new_obj = obj != nullptr ? callback(obj, arg) : nullptr; if (new_obj == nullptr) { VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object " - << m->GetObject(); + << obj; delete m; it = list_.erase(it); } else { diff --git a/runtime/monitor.h b/runtime/monitor.h index bc1b2ed4eb..7af2d4cf3a 100644 --- a/runtime/monitor.h +++ b/runtime/monitor.h @@ -27,6 +27,7 @@ #include "atomic.h" #include "base/mutex.h" #include "object_callbacks.h" +#include "read_barrier.h" #include "thread_state.h" namespace art { @@ -92,8 +93,9 @@ class Monitor { static bool IsValidLockWord(LockWord lock_word); + template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier> mirror::Object* GetObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - return obj_; + return ReadBarrier::BarrierForWeakRoot<mirror::Object, kReadBarrierOption>(obj_); } void SetObject(mirror::Object* object); @@ -190,7 +192,9 @@ class Monitor { // Owner's recursive lock depth. int lock_count_ GUARDED_BY(monitor_lock_); - // What object are we part of. + // What object are we part of. This is a weak root. Do not access + // this directly, use GetObject() to read it so it will be guarded + // by a read barrier. mirror::Object* obj_; // Threads currently waiting on this monitor. diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index 69b05f41c4..d9c9b5937d 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -155,6 +155,21 @@ static jstring VMRuntime_vmLibrary(JNIEnv* env, jobject) { return env->NewStringUTF(kIsDebugBuild ? "libartd.so" : "libart.so"); } +static jstring VMRuntime_vmInstructionSet(JNIEnv* env, jobject) { + InstructionSet isa = Runtime::Current()->GetInstructionSet(); + const char* isa_string = GetInstructionSetString(isa); + return env->NewStringUTF(isa_string); +} + +static jboolean VMRuntime_is64Bit(JNIEnv* env, jobject) { + bool is64BitMode = (sizeof(void*) == sizeof(uint64_t)); + return is64BitMode ? JNI_TRUE : JNI_FALSE; +} + +static jboolean VMRuntime_isCheckJniEnabled(JNIEnv* env, jobject) { + return Runtime::Current()->GetJavaVM()->check_jni ? JNI_TRUE : JNI_FALSE; +} + static void VMRuntime_setTargetSdkVersionNative(JNIEnv* env, jobject, jint targetSdkVersion) { // This is the target SDK version of the app we're about to run. It is intended that this a place // where workarounds can be enabled. @@ -529,6 +544,9 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(VMRuntime, trimHeap, "()V"), NATIVE_METHOD(VMRuntime, vmVersion, "()Ljava/lang/String;"), NATIVE_METHOD(VMRuntime, vmLibrary, "()Ljava/lang/String;"), + NATIVE_METHOD(VMRuntime, vmInstructionSet, "()Ljava/lang/String;"), + NATIVE_METHOD(VMRuntime, is64Bit, "!()Z"), + NATIVE_METHOD(VMRuntime, isCheckJniEnabled, "!()Z"), NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"), NATIVE_METHOD(VMRuntime, registerAppInfo, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V"), }; diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc index 5d90f1a579..e17e60a7ce 100644 --- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc +++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc @@ -52,9 +52,15 @@ static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint th jobject internal_trace = self->CreateInternalStackTrace<false>(soa); trace = Thread::InternalStackTraceToStackTraceElementArray(soa, internal_trace); } else { - // Suspend thread to build stack trace. ThreadList* thread_list = Runtime::Current()->GetThreadList(); bool timed_out; + + // Check for valid thread + if (thin_lock_id == ThreadList::kInvalidThreadId) { + return nullptr; + } + + // Suspend thread to build stack trace. Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id, false, &timed_out); if (thread != nullptr) { { diff --git a/runtime/oat.cc b/runtime/oat.cc index cb9334a120..10d335eec1 100644 --- a/runtime/oat.cc +++ b/runtime/oat.cc @@ -22,7 +22,7 @@ namespace art { const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' }; -const uint8_t OatHeader::kOatVersion[] = { '0', '2', '8', '\0' }; +const uint8_t OatHeader::kOatVersion[] = { '0', '2', '9', '\0' }; OatHeader::OatHeader() { memset(this, 0, sizeof(*this)); diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index 4330d275a9..3756435b1c 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -790,7 +790,7 @@ void ParsedOptions::Usage(const char* fmt, ...) { UsageMessage(stream, " -Xprofile-period:integervalue\n"); UsageMessage(stream, " -Xprofile-duration:integervalue\n"); UsageMessage(stream, " -Xprofile-interval:integervalue\n"); - UsageMessage(stream, " -Xprofile-backoff:integervalue\n"); + UsageMessage(stream, " -Xprofile-backoff:doublevalue\n"); UsageMessage(stream, " -Xcompiler-option dex2oat-option\n"); UsageMessage(stream, " -Ximage-compiler-option dex2oat-option\n"); UsageMessage(stream, "\n"); diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h index 88e2f8fa61..4302c9ef85 100644 --- a/runtime/read_barrier-inl.h +++ b/runtime/read_barrier-inl.h @@ -43,6 +43,21 @@ inline MirrorType* ReadBarrier::Barrier( } } +template <typename MirrorType, ReadBarrierOption kReadBarrierOption> +inline MirrorType* ReadBarrier::BarrierForWeakRoot(MirrorType* ref) { + UNUSED(ref); + const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier; + if (with_read_barrier && kUseBakerReadBarrier) { + // To be implemented. + return ref; + } else if (with_read_barrier && kUseBrooksReadBarrier) { + // To be implemented. + return ref; + } else { + return ref; + } +} + } // namespace art #endif // ART_RUNTIME_READ_BARRIER_INL_H_ diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h index 73c3d43e92..e40e8eaa37 100644 --- a/runtime/read_barrier.h +++ b/runtime/read_barrier.h @@ -37,6 +37,10 @@ class ReadBarrier { ALWAYS_INLINE static MirrorType* Barrier( mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier> + ALWAYS_INLINE static MirrorType* BarrierForWeakRoot(MirrorType* ref) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); }; } // namespace art diff --git a/runtime/thread.h b/runtime/thread.h index 62fa323212..9a7cb486d8 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -396,11 +396,11 @@ class Thread { // Convert a jobject into a Object* mirror::Object* DecodeJObject(jobject obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - mirror::Object* GetMonitorEnterObject() const { + mirror::Object* GetMonitorEnterObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return tlsPtr_.monitor_enter_object; } - void SetMonitorEnterObject(mirror::Object* obj) { + void SetMonitorEnterObject(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { tlsPtr_.monitor_enter_object = obj; } @@ -1045,9 +1045,6 @@ class Thread { // A cached pthread_t for the pthread underlying this Thread*. pthread_t pthread_self; - // Support for Mutex lock hierarchy bug detection. - BaseMutex* held_mutexes[kLockLevelCount]; - // If no_thread_suspension_ is > 0, what is causing that assertion. const char* last_no_thread_suspension_cause; @@ -1074,6 +1071,9 @@ class Thread { // Thread-local allocation stack data/routines. mirror::Object** thread_local_alloc_stack_top; mirror::Object** thread_local_alloc_stack_end; + + // Support for Mutex lock hierarchy bug detection. + BaseMutex* held_mutexes[kLockLevelCount]; } tlsPtr_; // Guards the 'interrupted_' and 'wait_monitor_' members. diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 8046500c59..388c9b4c76 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -40,8 +40,7 @@ namespace art { ThreadList::ThreadList() - : allocated_ids_lock_("allocated thread ids lock"), - suspend_all_count_(0), debug_suspend_all_count_(0), + : suspend_all_count_(0), debug_suspend_all_count_(0), thread_exit_cond_("thread exit condition variable", *Locks::thread_list_lock_) { CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1))); } @@ -849,7 +848,7 @@ void ThreadList::VerifyRoots(VerifyRootCallback* callback, void* arg) const { } uint32_t ThreadList::AllocThreadId(Thread* self) { - MutexLock mu(self, allocated_ids_lock_); + MutexLock mu(self, *Locks::allocated_thread_ids_lock_); for (size_t i = 0; i < allocated_ids_.size(); ++i) { if (!allocated_ids_[i]) { allocated_ids_.set(i); @@ -861,7 +860,7 @@ uint32_t ThreadList::AllocThreadId(Thread* self) { } void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) { - MutexLock mu(self, allocated_ids_lock_); + MutexLock mu(self, *Locks::allocated_thread_ids_lock_); --id; // Zero is reserved to mean "invalid". DCHECK(allocated_ids_[id]) << id; allocated_ids_.reset(id); diff --git a/runtime/thread_list.h b/runtime/thread_list.h index a574340368..d46987a8b8 100644 --- a/runtime/thread_list.h +++ b/runtime/thread_list.h @@ -132,7 +132,7 @@ class ThreadList { private: uint32_t AllocThreadId(Thread* self); - void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(allocated_ids_lock_); + void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(Locks::allocated_thread_ids_lock_); bool Contains(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_); bool Contains(pid_t tid) EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_); @@ -151,8 +151,7 @@ class ThreadList { LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::thread_suspend_count_lock_); - mutable Mutex allocated_ids_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(allocated_ids_lock_); + std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(Locks::allocated_thread_ids_lock_); // The actual list of all threads. std::list<Thread*> list_ GUARDED_BY(Locks::thread_list_lock_); |