diff options
152 files changed, 4757 insertions, 2378 deletions
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index 68f5ed703d..0d3b4c9daa 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -138,7 +138,7 @@ art_gcc_cflags := -Wunused-but-set-parameter # Suggest final: Have to move to a more recent GCC. # -Wsuggest-final-types - +ART_TARGET_CLANG_CFLAGS := $(art_clang_cflags) ifeq ($(ART_HOST_CLANG),true) # Bug: 15446488. We don't omit the frame pointer to work around # clang/libunwind bugs that cause SEGVs in run-test-004-ThreadStress. @@ -146,10 +146,14 @@ ifeq ($(ART_HOST_CLANG),true) else ART_HOST_CFLAGS += $(art_gcc_cflags) endif -ifeq ($(ART_TARGET_CLANG),true) - ART_TARGET_CFLAGS += $(art_clang_cflags) -else +ifneq ($(ART_TARGET_CLANG),true) ART_TARGET_CFLAGS += $(art_gcc_cflags) +else + # TODO: if we ever want to support GCC/Clang mix for multi-target products, this needs to be + # split up. + ifeq ($(ART_TARGET_CLANG_$(TARGET_ARCH)),false) + ART_TARGET_CFLAGS += $(art_gcc_cflags) + endif endif # Clear local variables now their use has ended. @@ -294,11 +298,9 @@ define set-target-local-cflags-vars LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS) endif - # TODO: Also set when ART_TARGET_CLANG_$(arch)!=false and ART_TARGET_CLANG==true + LOCAL_CLANG_CFLAGS := $(ART_TARGET_CLANG_CFLAGS) $(foreach arch,$(ART_SUPPORTED_ARCH), - ifeq ($$(ART_TARGET_CLANG_$(arch)),true) - LOCAL_CFLAGS_$(arch) += $$(ART_TARGET_CLANG_CFLAGS_$(arch)) - endif) + LOCAL_CLANG_CFLAGS_$(arch) += $$(ART_TARGET_CLANG_CFLAGS_$(arch))) # Clear locally used variables. art_target_cflags_ndebug_or_debug := diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk index 0ae42dd383..da50d53dbf 100644 --- a/build/Android.common_test.mk +++ b/build/Android.common_test.mk @@ -25,21 +25,7 @@ ART_TARGET_CFLAGS += -DART_TARGET_NATIVETEST_DIR=${ART_TARGET_NATIVETEST_DIR} # List of known broken tests that we won't attempt to execute. The test name must be the full # rule name such as test-art-host-oat-optimizing-HelloWorld64. -ART_TEST_KNOWN_BROKEN := \ - test-art-target-run-test-gcstress-optimizing-prebuild-004-SignalTest32 \ - test-art-target-run-test-gcstress-optimizing-norelocate-004-SignalTest32 \ - test-art-target-run-test-gcstress-default-prebuild-004-SignalTest32 \ - test-art-target-run-test-gcstress-default-norelocate-004-SignalTest32 \ - test-art-target-run-test-gcstress-optimizing-relocate-004-SignalTest32 \ - test-art-target-run-test-gcstress-default-relocate-004-SignalTest32 \ - test-art-target-run-test-gcstress-optimizing-no-prebuild-004-SignalTest32 \ - test-art-target-run-test-gcstress-default-no-prebuild-004-SignalTest32 \ - test-art-host-run-test-gcstress-default-prebuild-114-ParallelGC32 \ - test-art-host-run-test-gcstress-interpreter-prebuild-114-ParallelGC32 \ - test-art-host-run-test-gcstress-optimizing-prebuild-114-ParallelGC32 \ - test-art-host-run-test-gcstress-default-prebuild-114-ParallelGC64 \ - test-art-host-run-test-gcstress-interpreter-prebuild-114-ParallelGC64 \ - test-art-host-run-test-gcstress-optimizing-prebuild-114-ParallelGC64 +ART_TEST_KNOWN_BROKEN := # Failing valgrind tests. # Note: *all* 64b tests involving the runtime do not work currently. b/15170219. diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 4c19ba0b4c..8180ce8638 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -66,8 +66,16 @@ ART_GTEST_elf_writer_test_TARGET_DEPS := $(TARGET_CORE_IMAGE_default_no-pic_64) ART_GTEST_proxy_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32) # The imgdiag test has dependencies on core.oat since it needs to load it during the test. -ART_GTEST_imgdiag_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32) -ART_GTEST_imgdiag_test_TARGET_DEPS := $(TARGET_CORE_IMAGE_default_no-pic_64) $(TARGET_CORE_IMAGE_default_no-pic_32) +# For the host, also add the installed tool (in the base size, that should suffice). For the +# target, just the module is fine, the sync will happen late enough. +ART_GTEST_imgdiag_test_HOST_DEPS := \ + $(HOST_CORE_IMAGE_default_no-pic_64) \ + $(HOST_CORE_IMAGE_default_no-pic_32) \ + $(HOST_OUT_EXECUTABLES)/imgdiagd +ART_GTEST_imgdiag_test_TARGET_DEPS := \ + $(TARGET_CORE_IMAGE_default_no-pic_64) \ + $(TARGET_CORE_IMAGE_default_no-pic_32) \ + imgdiagd # The path for which all the source files are relative, not actually the current directory. LOCAL_PATH := art @@ -115,6 +123,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ runtime/gc/space/rosalloc_space_static_test.cc \ runtime/gc/space/rosalloc_space_random_test.cc \ runtime/gc/space/large_object_space_test.cc \ + runtime/gc/task_processor_test.cc \ runtime/gtest_test.cc \ runtime/handle_scope_test.cc \ runtime/indenter_test.cc \ @@ -173,6 +182,7 @@ COMPILER_GTEST_COMMON_SRC_FILES := \ compiler/output_stream_test.cc \ compiler/utils/arena_allocator_test.cc \ compiler/utils/dedupe_set_test.cc \ + compiler/utils/swap_space_test.cc \ compiler/utils/arm/managed_register_arm_test.cc \ compiler/utils/arm64/managed_register_arm64_test.cc \ compiler/utils/x86/managed_register_x86_test.cc \ @@ -200,14 +210,14 @@ include $(CLEAR_VARS) LOCAL_MODULE := libart-gtest LOCAL_MODULE_TAGS := optional LOCAL_CPP_EXTENSION := cc -LOCAL_CFLAGS := $(ART_TARGET_CFLAGS) LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/compiler LOCAL_SHARED_LIBRARIES := libartd libartd-compiler libdl LOCAL_STATIC_LIBRARIES += libgtest -LOCAL_CLANG := $(ART_TARGET_CLANG) LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk +$(eval $(call set-target-local-clang-vars)) +$(eval $(call set-target-local-cflags-vars,debug)) include external/libcxx/libcxx.mk include $(BUILD_SHARED_LIBRARY) diff --git a/build/Android.oat.mk b/build/Android.oat.mk index d4fd5190a0..8d49565478 100644 --- a/build/Android.oat.mk +++ b/build/Android.oat.mk @@ -52,6 +52,9 @@ define create-core-oat-host-rules core_dex2oat_dependency := $(DEX2OAT) endif + ifeq ($(1),default) + core_compile_options += --compiler-backend=Quick + endif ifeq ($(1),optimizing) core_compile_options += --compiler-backend=Optimizing core_dex2oat_dependency := $(DEX2OAT) @@ -163,6 +166,9 @@ define create-core-oat-target-rules core_dex2oat_dependency := $(DEX2OAT) endif + ifeq ($(1),default) + core_compile_options += --compiler-backend=Quick + endif ifeq ($(1),optimizing) core_compile_options += --compiler-backend=Optimizing core_dex2oat_dependency := $(DEX2OAT) diff --git a/cmdline/cmdline.h b/cmdline/cmdline.h index c15594a80e..30012d0978 100644 --- a/cmdline/cmdline.h +++ b/cmdline/cmdline.h @@ -221,18 +221,10 @@ struct CmdlineArgs { virtual ~CmdlineArgs() {} - protected: - virtual ParseStatus ParseCustom(const StringPiece& option, std::string* error_msg) { - UNUSED(option); - UNUSED(error_msg); - - return kParseUnknownArgument; - } - - virtual ParseStatus ParseChecks(std::string* error_msg) { + bool ParseCheckBootImage(std::string* error_msg) { if (boot_image_location_ == nullptr) { *error_msg = "--boot-image must be specified"; - return kParseError; + return false; } DBG_LOG << "boot image location: " << boot_image_location_; @@ -243,7 +235,7 @@ struct CmdlineArgs { size_t file_name_idx = boot_image_location.rfind("/"); if (file_name_idx == std::string::npos) { // Prevent a InsertIsaDirectory check failure. *error_msg = "Boot image location must have a / in it"; - return kParseError; + return false; } // Don't let image locations with the 'arch' in it through, since it's not a location. @@ -263,7 +255,7 @@ struct CmdlineArgs { if (GetInstructionSetFromString(parent_dir_name.c_str()) != kNone) { *error_msg = "Do not specify the architecture as part of the boot image location"; - return kParseError; + return false; } } @@ -272,19 +264,28 @@ struct CmdlineArgs { if (!LocationToFilename(boot_image_location, instruction_set_, &file_name)) { *error_msg = StringPrintf("No corresponding file for location '%s' exists", file_name.c_str()); - return kParseError; + return false; } DBG_LOG << "boot_image_filename does exist: " << file_name; } - return kParseOk; + return true; } - private: void PrintUsage() { fprintf(stderr, "%s", GetUsage().c_str()); } + + protected: + virtual ParseStatus ParseCustom(const StringPiece& option ATTRIBUTE_UNUSED, + std::string* error_msg ATTRIBUTE_UNUSED) { + return kParseUnknownArgument; + } + + virtual ParseStatus ParseChecks(std::string* error_msg ATTRIBUTE_UNUSED) { + return kParseOk; + } }; template <typename Args = CmdlineArgs> @@ -300,14 +301,21 @@ struct CmdlineMain { return EXIT_FAILURE; } - std::unique_ptr<Runtime> runtime = CreateRuntime(args.get()); - if (runtime == nullptr) { - return EXIT_FAILURE; - } - bool needs_runtime = NeedsRuntime(); + std::unique_ptr<Runtime> runtime; + if (needs_runtime) { + std::string error_msg; + if (!args_->ParseCheckBootImage(&error_msg)) { + fprintf(stderr, "%s\n", error_msg.c_str()); + args_->PrintUsage(); + return EXIT_FAILURE; + } + runtime.reset(CreateRuntime(args.get())); + if (runtime == nullptr) { + return EXIT_FAILURE; + } if (!ExecuteWithRuntime(runtime.get())) { return EXIT_FAILURE; } @@ -358,11 +366,10 @@ struct CmdlineMain { Args* args_ = nullptr; private: - std::unique_ptr<Runtime> CreateRuntime(CmdlineArgs* args) { + Runtime* CreateRuntime(CmdlineArgs* args) { CHECK(args != nullptr); - return std::unique_ptr<Runtime>(StartRuntime(args->boot_image_location_, - args->instruction_set_)); + return StartRuntime(args->boot_image_location_, args->instruction_set_); } }; } // namespace art diff --git a/compiler/Android.mk b/compiler/Android.mk index 8bcc2f99ec..db338f0538 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -127,6 +127,7 @@ LIBART_COMPILER_SRC_FILES := \ utils/x86_64/assembler_x86_64.cc \ utils/x86_64/managed_register_x86_64.cc \ utils/scoped_arena_allocator.cc \ + utils/swap_space.cc \ buffered_output_stream.cc \ compiler.cc \ elf_writer.cc \ diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index 059a9eea50..7df71f5b8a 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -48,18 +48,22 @@ void CommonCompilerTest::MakeExecutable(mirror::ArtMethod* method) { method->GetDexMethodIndex())); } if (compiled_method != nullptr) { - const std::vector<uint8_t>* code = compiled_method->GetQuickCode(); + const SwapVector<uint8_t>* code = compiled_method->GetQuickCode(); uint32_t code_size = code->size(); CHECK_NE(0u, code_size); - const std::vector<uint8_t>& vmap_table = compiled_method->GetVmapTable(); + const SwapVector<uint8_t>& vmap_table = compiled_method->GetVmapTable(); uint32_t vmap_table_offset = vmap_table.empty() ? 0u : sizeof(OatQuickMethodHeader) + vmap_table.size(); - const std::vector<uint8_t>& mapping_table = *compiled_method->GetMappingTable(); - uint32_t mapping_table_offset = mapping_table.empty() ? 0u - : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table.size(); - const std::vector<uint8_t>& gc_map = *compiled_method->GetGcMap(); - uint32_t gc_map_offset = gc_map.empty() ? 0u - : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table.size() + gc_map.size(); + const SwapVector<uint8_t>* mapping_table = compiled_method->GetMappingTable(); + bool mapping_table_used = mapping_table != nullptr && !mapping_table->empty(); + size_t mapping_table_size = mapping_table_used ? mapping_table->size() : 0U; + uint32_t mapping_table_offset = !mapping_table_used ? 0u + : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table_size; + const SwapVector<uint8_t>* gc_map = compiled_method->GetGcMap(); + bool gc_map_used = gc_map != nullptr && !gc_map->empty(); + size_t gc_map_size = gc_map_used ? gc_map->size() : 0U; + uint32_t gc_map_offset = !gc_map_used ? 0u + : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table_size + gc_map_size; OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset, gc_map_offset, compiled_method->GetFrameSizeInBytes(), compiled_method->GetCoreSpillMask(), @@ -67,16 +71,20 @@ void CommonCompilerTest::MakeExecutable(mirror::ArtMethod* method) { header_code_and_maps_chunks_.push_back(std::vector<uint8_t>()); std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back(); - size_t size = sizeof(method_header) + code_size + vmap_table.size() + mapping_table.size() + - gc_map.size(); + size_t size = sizeof(method_header) + code_size + vmap_table.size() + mapping_table_size + + gc_map_size; size_t code_offset = compiled_method->AlignCode(size - code_size); size_t padding = code_offset - (size - code_size); chunk->reserve(padding + size); chunk->resize(sizeof(method_header)); memcpy(&(*chunk)[0], &method_header, sizeof(method_header)); chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end()); - chunk->insert(chunk->begin(), mapping_table.begin(), mapping_table.end()); - chunk->insert(chunk->begin(), gc_map.begin(), gc_map.end()); + if (mapping_table_used) { + chunk->insert(chunk->begin(), mapping_table->begin(), mapping_table->end()); + } + if (gc_map_used) { + chunk->insert(chunk->begin(), gc_map->begin(), gc_map->end()); + } chunk->insert(chunk->begin(), padding, 0); chunk->insert(chunk->end(), code->begin(), code->end()); CHECK_EQ(padding + size, chunk->size()); @@ -156,7 +164,7 @@ void CommonCompilerTest::SetUp() { compiler_kind, instruction_set, instruction_set_features_.get(), true, new std::set<std::string>, nullptr, - 2, true, true, timer_.get(), "")); + 2, true, true, timer_.get(), -1, "")); } // We typically don't generate an image in unit tests, disable this optimization by default. compiler_driver_->SetSupportBootImageFixup(false); diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index 060af723a7..234e8b96f6 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -20,13 +20,13 @@ namespace art { CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set, - const std::vector<uint8_t>& quick_code) + const ArrayRef<const uint8_t>& quick_code) : compiler_driver_(compiler_driver), instruction_set_(instruction_set), quick_code_(nullptr) { SetCode(&quick_code); } -void CompiledCode::SetCode(const std::vector<uint8_t>* quick_code) { +void CompiledCode::SetCode(const ArrayRef<const uint8_t>* quick_code) { if (quick_code != nullptr) { CHECK(!quick_code->empty()); quick_code_ = compiler_driver_->DeduplicateCode(*quick_code); @@ -108,61 +108,88 @@ void CompiledCode::AddOatdataOffsetToCompliledCodeOffset(uint32_t offset) { CompiledMethod::CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set, - const std::vector<uint8_t>& quick_code, + const ArrayRef<const uint8_t>& quick_code, const size_t frame_size_in_bytes, const uint32_t core_spill_mask, const uint32_t fp_spill_mask, - SrcMap* src_mapping_table, - const std::vector<uint8_t>& mapping_table, - const std::vector<uint8_t>& vmap_table, - const std::vector<uint8_t>& native_gc_map, - const std::vector<uint8_t>* cfi_info, + DefaultSrcMap* src_mapping_table, + const ArrayRef<const uint8_t>& mapping_table, + const ArrayRef<const uint8_t>& vmap_table, + const ArrayRef<const uint8_t>& native_gc_map, + const ArrayRef<const uint8_t>& cfi_info, const ArrayRef<LinkerPatch>& patches) : CompiledCode(driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes), core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask), - src_mapping_table_(driver->DeduplicateSrcMappingTable(src_mapping_table->Arrange())), - mapping_table_(driver->DeduplicateMappingTable(mapping_table)), + src_mapping_table_(src_mapping_table == nullptr ? + driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>()) : + driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>(src_mapping_table->Arrange()))), + mapping_table_(mapping_table.data() == nullptr ? + nullptr : driver->DeduplicateMappingTable(mapping_table)), vmap_table_(driver->DeduplicateVMapTable(vmap_table)), - gc_map_(driver->DeduplicateGCMap(native_gc_map)), - cfi_info_(driver->DeduplicateCFIInfo(cfi_info)), - patches_(patches.begin(), patches.end()) { + gc_map_(native_gc_map.data() == nullptr ? nullptr : driver->DeduplicateGCMap(native_gc_map)), + cfi_info_(cfi_info.data() == nullptr ? nullptr : driver->DeduplicateCFIInfo(cfi_info)), + patches_(patches.begin(), patches.end(), driver->GetSwapSpaceAllocator()) { } -CompiledMethod::CompiledMethod(CompilerDriver* driver, - InstructionSet instruction_set, - const std::vector<uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const std::vector<uint8_t>& stack_map) - : CompiledCode(driver, instruction_set, quick_code), - frame_size_in_bytes_(frame_size_in_bytes), - core_spill_mask_(core_spill_mask), - fp_spill_mask_(fp_spill_mask), - src_mapping_table_(driver->DeduplicateSrcMappingTable(SrcMap())), - mapping_table_(nullptr), - vmap_table_(driver->DeduplicateVMapTable(stack_map)), - gc_map_(nullptr), - cfi_info_(nullptr), - patches_() { +CompiledMethod* CompiledMethod::SwapAllocCompiledMethod( + CompilerDriver* driver, + InstructionSet instruction_set, + const ArrayRef<const uint8_t>& quick_code, + const size_t frame_size_in_bytes, + const uint32_t core_spill_mask, + const uint32_t fp_spill_mask, + DefaultSrcMap* src_mapping_table, + const ArrayRef<const uint8_t>& mapping_table, + const ArrayRef<const uint8_t>& vmap_table, + const ArrayRef<const uint8_t>& native_gc_map, + const ArrayRef<const uint8_t>& cfi_info, + const ArrayRef<LinkerPatch>& patches) { + SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator()); + CompiledMethod* ret = alloc.allocate(1); + alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask, + fp_spill_mask, src_mapping_table, mapping_table, vmap_table, native_gc_map, + cfi_info, patches); + return ret; } -CompiledMethod::CompiledMethod(CompilerDriver* driver, - InstructionSet instruction_set, - const std::vector<uint8_t>& code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const std::vector<uint8_t>* cfi_info) - : CompiledCode(driver, instruction_set, code), - frame_size_in_bytes_(frame_size_in_bytes), - core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask), - src_mapping_table_(driver->DeduplicateSrcMappingTable(SrcMap())), - mapping_table_(driver->DeduplicateMappingTable(std::vector<uint8_t>())), - vmap_table_(driver->DeduplicateVMapTable(std::vector<uint8_t>())), - gc_map_(driver->DeduplicateGCMap(std::vector<uint8_t>())), - cfi_info_(driver->DeduplicateCFIInfo(cfi_info)), - patches_() { +CompiledMethod* CompiledMethod::SwapAllocCompiledMethodStackMap( + CompilerDriver* driver, + InstructionSet instruction_set, + const ArrayRef<const uint8_t>& quick_code, + const size_t frame_size_in_bytes, + const uint32_t core_spill_mask, + const uint32_t fp_spill_mask, + const ArrayRef<const uint8_t>& stack_map) { + SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator()); + CompiledMethod* ret = alloc.allocate(1); + alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask, + fp_spill_mask, nullptr, ArrayRef<const uint8_t>(), stack_map, + ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), ArrayRef<LinkerPatch>()); + return ret; +} + +CompiledMethod* CompiledMethod::SwapAllocCompiledMethodCFI( + CompilerDriver* driver, + InstructionSet instruction_set, + const ArrayRef<const uint8_t>& quick_code, + const size_t frame_size_in_bytes, + const uint32_t core_spill_mask, + const uint32_t fp_spill_mask, + const ArrayRef<const uint8_t>& cfi_info) { + SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator()); + CompiledMethod* ret = alloc.allocate(1); + alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask, + fp_spill_mask, nullptr, ArrayRef<const uint8_t>(), + ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), + cfi_info, ArrayRef<LinkerPatch>()); + return ret; +} + + +void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m) { + SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator()); + alloc.destroy(m); + alloc.deallocate(m, 1); } } // namespace art diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index d93db03806..6013507ac4 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -25,6 +25,7 @@ #include "method_reference.h" #include "utils.h" #include "utils/array_ref.h" +#include "utils/swap_space.h" namespace llvm { class Function; @@ -38,17 +39,17 @@ class CompiledCode { public: // For Quick to supply an code blob CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set, - const std::vector<uint8_t>& quick_code); + const ArrayRef<const uint8_t>& quick_code); InstructionSet GetInstructionSet() const { return instruction_set_; } - const std::vector<uint8_t>* GetQuickCode() const { + const SwapVector<uint8_t>* GetQuickCode() const { return quick_code_; } - void SetCode(const std::vector<uint8_t>* quick_code); + void SetCode(const ArrayRef<const uint8_t>* quick_code); bool operator==(const CompiledCode& rhs) const; @@ -78,7 +79,7 @@ class CompiledCode { const InstructionSet instruction_set_; // Used to store the PIC code for Quick. - std::vector<uint8_t>* quick_code_; + SwapVector<uint8_t>* quick_code_; // There are offsets from the oatdata symbol to where the offset to // the compiled method will be found. These are computed by the @@ -109,8 +110,23 @@ class SrcMapElem { } }; -class SrcMap FINAL : public std::vector<SrcMapElem> { +template <class Allocator> +class SrcMap FINAL : public std::vector<SrcMapElem, Allocator> { public: + using std::vector<SrcMapElem, Allocator>::begin; + using typename std::vector<SrcMapElem, Allocator>::const_iterator; + using std::vector<SrcMapElem, Allocator>::empty; + using std::vector<SrcMapElem, Allocator>::end; + using std::vector<SrcMapElem, Allocator>::resize; + using std::vector<SrcMapElem, Allocator>::shrink_to_fit; + using std::vector<SrcMapElem, Allocator>::size; + + explicit SrcMap() {} + + template <class InputIt> + SrcMap(InputIt first, InputIt last, const Allocator& alloc) + : std::vector<SrcMapElem, Allocator>(first, last, alloc) {} + void SortByFrom() { std::sort(begin(), end(), [] (const SrcMapElem& lhs, const SrcMapElem& rhs) -> bool { return lhs.from_ < rhs.from_; @@ -158,6 +174,10 @@ class SrcMap FINAL : public std::vector<SrcMapElem> { } }; +using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; +using SwapSrcMap = SrcMap<SwapAllocator<SrcMapElem>>; + + enum LinkerPatchType { kLinkerPatchMethod, kLinkerPatchCall, @@ -255,40 +275,57 @@ inline bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs) { class CompiledMethod FINAL : public CompiledCode { public: - // Constructs a CompiledMethod for Quick. + // Constructs a CompiledMethod. + // Note: Consider using the static allocation methods below that will allocate the CompiledMethod + // in the swap space. CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set, - const std::vector<uint8_t>& quick_code, + const ArrayRef<const uint8_t>& quick_code, const size_t frame_size_in_bytes, const uint32_t core_spill_mask, const uint32_t fp_spill_mask, - SrcMap* src_mapping_table, - const std::vector<uint8_t>& mapping_table, - const std::vector<uint8_t>& vmap_table, - const std::vector<uint8_t>& native_gc_map, - const std::vector<uint8_t>* cfi_info, + DefaultSrcMap* src_mapping_table, + const ArrayRef<const uint8_t>& mapping_table, + const ArrayRef<const uint8_t>& vmap_table, + const ArrayRef<const uint8_t>& native_gc_map, + const ArrayRef<const uint8_t>& cfi_info, const ArrayRef<LinkerPatch>& patches = ArrayRef<LinkerPatch>()); - // Constructs a CompiledMethod for Optimizing. - CompiledMethod(CompilerDriver* driver, - InstructionSet instruction_set, - const std::vector<uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const std::vector<uint8_t>& vmap_table); - - // Constructs a CompiledMethod for the QuickJniCompiler. - CompiledMethod(CompilerDriver* driver, - InstructionSet instruction_set, - const std::vector<uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const std::vector<uint8_t>* cfi_info); - ~CompiledMethod() {} + static CompiledMethod* SwapAllocCompiledMethod( + CompilerDriver* driver, + InstructionSet instruction_set, + const ArrayRef<const uint8_t>& quick_code, + const size_t frame_size_in_bytes, + const uint32_t core_spill_mask, + const uint32_t fp_spill_mask, + DefaultSrcMap* src_mapping_table, + const ArrayRef<const uint8_t>& mapping_table, + const ArrayRef<const uint8_t>& vmap_table, + const ArrayRef<const uint8_t>& native_gc_map, + const ArrayRef<const uint8_t>& cfi_info, + const ArrayRef<LinkerPatch>& patches = ArrayRef<LinkerPatch>()); + + static CompiledMethod* SwapAllocCompiledMethodStackMap( + CompilerDriver* driver, + InstructionSet instruction_set, + const ArrayRef<const uint8_t>& quick_code, + const size_t frame_size_in_bytes, + const uint32_t core_spill_mask, + const uint32_t fp_spill_mask, + const ArrayRef<const uint8_t>& stack_map); + + static CompiledMethod* SwapAllocCompiledMethodCFI(CompilerDriver* driver, + InstructionSet instruction_set, + const ArrayRef<const uint8_t>& quick_code, + const size_t frame_size_in_bytes, + const uint32_t core_spill_mask, + const uint32_t fp_spill_mask, + const ArrayRef<const uint8_t>& cfi_info); + + static void ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m); + size_t GetFrameSizeInBytes() const { return frame_size_in_bytes_; } @@ -301,29 +338,29 @@ class CompiledMethod FINAL : public CompiledCode { return fp_spill_mask_; } - const SrcMap& GetSrcMappingTable() const { + const SwapSrcMap& GetSrcMappingTable() const { DCHECK(src_mapping_table_ != nullptr); return *src_mapping_table_; } - std::vector<uint8_t> const* GetMappingTable() const { + SwapVector<uint8_t> const* GetMappingTable() const { return mapping_table_; } - const std::vector<uint8_t>& GetVmapTable() const { + const SwapVector<uint8_t>& GetVmapTable() const { DCHECK(vmap_table_ != nullptr); return *vmap_table_; } - std::vector<uint8_t> const* GetGcMap() const { + SwapVector<uint8_t> const* GetGcMap() const { return gc_map_; } - const std::vector<uint8_t>* GetCFIInfo() const { + const SwapVector<uint8_t>* GetCFIInfo() const { return cfi_info_; } - const std::vector<LinkerPatch>& GetPatches() const { + const SwapVector<LinkerPatch>& GetPatches() const { return patches_; } @@ -335,19 +372,19 @@ class CompiledMethod FINAL : public CompiledCode { // For quick code, a bit mask describing spilled FPR callee-save registers. const uint32_t fp_spill_mask_; // For quick code, a set of pairs (PC, Line) mapping from native PC offset to Java line - SrcMap* src_mapping_table_; + SwapSrcMap* src_mapping_table_; // For quick code, a uleb128 encoded map from native PC offset to dex PC aswell as dex PC to // native PC offset. Size prefixed. - std::vector<uint8_t>* mapping_table_; + SwapVector<uint8_t>* mapping_table_; // For quick code, a uleb128 encoded map from GPR/FPR register to dex register. Size prefixed. - std::vector<uint8_t>* vmap_table_; + SwapVector<uint8_t>* vmap_table_; // For quick code, a map keyed by native PC indices to bitmaps describing what dalvik registers // are live. - std::vector<uint8_t>* gc_map_; + SwapVector<uint8_t>* gc_map_; // For quick code, a FDE entry for the debug_frame section. - std::vector<uint8_t>* cfi_info_; + SwapVector<uint8_t>* cfi_info_; // For quick code, linker patches needed by the method. - std::vector<LinkerPatch> patches_; + SwapVector<LinkerPatch> patches_; }; } // namespace art diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc index 6a610ab201..e5358139d8 100644 --- a/compiler/dex/bb_optimizations.cc +++ b/compiler/dex/bb_optimizations.cc @@ -51,20 +51,4 @@ bool BBCombine::Worker(PassDataHolder* data) const { return false; } -/* - * BasicBlock Optimization pass implementation start. - */ -void BBOptimizations::Start(PassDataHolder* data) const { - DCHECK(data != nullptr); - CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; - DCHECK(c_unit != nullptr); - /* - * This pass has a different ordering depEnding on the suppress exception, - * so do the pass here for now: - * - Later, the Start should just change the ordering and we can move the extended - * creation into the pass driver's main job with a new iterator - */ - c_unit->mir_graph->BasicBlockOptimization(); -} - } // namespace art diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h index 0407e323cb..b07a415d4a 100644 --- a/compiler/dex/bb_optimizations.h +++ b/compiler/dex/bb_optimizations.h @@ -284,7 +284,8 @@ class BBCombine : public PassME { */ class BBOptimizations : public PassME { public: - BBOptimizations() : PassME("BBOptimizations", kNoNodes, "5_post_bbo_cfg") { + BBOptimizations() + : PassME("BBOptimizations", kNoNodes, kOptimizationBasicBlockChange, "5_post_bbo_cfg") { } bool Gate(const PassDataHolder* data) const { @@ -294,7 +295,28 @@ class BBOptimizations : public PassME { return ((c_unit->disable_opt & (1 << kBBOpt)) == 0); } - void Start(PassDataHolder* data) const; + void Start(PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph->BasicBlockOptimizationStart(); + + /* + * This pass has a different ordering depending on the suppress exception, + * so do the pass here for now: + * - Later, the Start should just change the ordering and we can move the extended + * creation into the pass driver's main job with a new iterator + */ + c_unit->mir_graph->BasicBlockOptimization(); + } + + void End(PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph->BasicBlockOptimizationEnd(); + down_cast<PassMEDataHolder*>(data)->dirty = !c_unit->mir_graph->DfsOrdersUpToDate(); + } }; /** diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 7ff06a04cb..7edb490176 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -555,7 +555,7 @@ std::ostream& operator<<(std::ostream& os, const DividePattern& pattern); * The current recipe is as follows: * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store. * -# Use AnyAny barrier after volatile store. (StoreLoad is as expensive.) - * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrierafter each volatile load. + * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load. * -# Use StoreStore barrier after all stores but before return from any constructor whose * class has final fields. * -# Use NTStoreStore to order non-temporal stores with respect to all later diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc index 7e3b4d8adf..18e346968a 100644 --- a/compiler/dex/global_value_numbering_test.cc +++ b/compiler/dex/global_value_numbering_test.cc @@ -215,7 +215,6 @@ class GlobalValueNumberingTest : public testing::Test { bb->data_flow_info->live_in_v = live_in_v_; } } - cu_.mir_graph->num_blocks_ = count; ASSERT_EQ(count, cu_.mir_graph->block_list_.size()); cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1]; ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type); diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc index 7b53b14909..0f0846c74c 100644 --- a/compiler/dex/mir_analysis.cc +++ b/compiler/dex/mir_analysis.cc @@ -1151,7 +1151,7 @@ bool MIRGraph::SkipCompilation(std::string* skip_message) { skip_compilation = true; *skip_message = "Huge method: " + std::to_string(GetNumDalvikInsns()); // If we're got a huge number of basic blocks, don't bother with further analysis. - if (static_cast<size_t>(num_blocks_) > (compiler_options.GetHugeMethodThreshold() / 2)) { + if (static_cast<size_t>(GetNumBlocks()) > (compiler_options.GetHugeMethodThreshold() / 2)) { return true; } } else if (compiler_options.IsLargeMethod(GetNumDalvikInsns()) && diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 71ad635ac4..abd34820ab 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -18,6 +18,7 @@ #include <inttypes.h> #include <queue> +#include <unistd.h> #include "base/bit_vector-inl.h" #include "base/stl_util.h" @@ -91,6 +92,9 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) num_reachable_blocks_(0), max_num_reachable_blocks_(0), dfs_orders_up_to_date_(false), + domination_up_to_date_(false), + mir_ssa_rep_up_to_date_(false), + topological_order_up_to_date_(false), dfs_order_(arena->Adapter(kArenaAllocDfsPreOrder)), dfs_post_order_(arena->Adapter(kArenaAllocDfsPostOrder)), dom_post_order_traversal_(arena->Adapter(kArenaAllocDomPostOrder)), @@ -105,7 +109,6 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) try_block_addr_(NULL), entry_block_(NULL), exit_block_(NULL), - num_blocks_(0), current_code_item_(NULL), dex_pc_to_block_map_(arena->Adapter()), m_units_(arena->Adapter()), @@ -691,7 +694,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ if (current_method_ == 0) { DCHECK(entry_block_ == NULL); DCHECK(exit_block_ == NULL); - DCHECK_EQ(num_blocks_, 0U); + DCHECK_EQ(GetNumBlocks(), 0U); // Use id 0 to represent a null block. BasicBlock* null_block = CreateNewBB(kNullBlock); DCHECK_EQ(null_block->id, NullBasicBlockId); @@ -874,6 +877,34 @@ uint64_t MIRGraph::GetDataFlowAttributes(MIR* mir) { return GetDataFlowAttributes(opcode); } +// The path can easily surpass FS limits because of parameters etc. Use pathconf to get FS +// restrictions here. Note that a successful invocation will return an actual value. If the path +// is too long for some reason, the return will be ENAMETOOLONG. Then cut off part of the name. +// +// It's possible the path is not valid, or some other errors appear. In that case return false. +static bool CreateDumpFile(std::string& fname, const char* dir_prefix, NarrowDexOffset start_offset, + const char *suffix, int nr, std::string* output) { + std::string dir = StringPrintf("./%s", dir_prefix); + int64_t max_name_length = pathconf(dir.c_str(), _PC_NAME_MAX); + if (max_name_length <= 0) { + PLOG(ERROR) << "Could not get file name restrictions for " << dir; + return false; + } + + std::string name = StringPrintf("%s%x%s_%d.dot", fname.c_str(), start_offset, + suffix == nullptr ? "" : suffix, nr); + std::string fpath; + if (static_cast<int64_t>(name.size()) > max_name_length) { + std::string suffix_str = StringPrintf("_%d.dot", nr); + name = name.substr(0, static_cast<size_t>(max_name_length) - suffix_str.size()) + suffix_str; + } + // Sanity check. + DCHECK_LE(name.size(), static_cast<size_t>(max_name_length)); + + *output = StringPrintf("%s%s", dir_prefix, name.c_str()); + return true; +} + // TODO: use a configurable base prefix, and adjust callers to supply pass name. /* Dump the CFG into a DOT graph */ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suffix) { @@ -882,15 +913,19 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff // Increment counter to get a unique file number. cnt++; + int nr = cnt.LoadRelaxed(); std::string fname(PrettyMethod(cu_->method_idx, *cu_->dex_file)); ReplaceSpecialChars(fname); - fname = StringPrintf("%s%s%x%s_%d.dot", dir_prefix, fname.c_str(), - GetBasicBlock(GetEntryBlock()->fall_through)->start_offset, - suffix == nullptr ? "" : suffix, - cnt.LoadRelaxed()); - file = fopen(fname.c_str(), "w"); + std::string fpath; + if (!CreateDumpFile(fname, dir_prefix, GetBasicBlock(GetEntryBlock()->fall_through)->start_offset, + suffix, nr, &fpath)) { + LOG(ERROR) << "Could not create dump file name for " << fname; + return; + } + file = fopen(fpath.c_str(), "w"); if (file == NULL) { + PLOG(ERROR) << "Could not open " << fpath << " for DumpCFG."; return; } fprintf(file, "digraph G {\n"); @@ -1740,6 +1775,9 @@ void MIRGraph::SSATransformationEnd() { // Update the maximum number of reachable blocks. max_num_reachable_blocks_ = num_reachable_blocks_; + + // Mark MIR SSA representations as up to date. + mir_ssa_rep_up_to_date_ = true; } size_t MIRGraph::GetNumDalvikInsns() const { @@ -2005,6 +2043,7 @@ void MIRGraph::ComputeTopologicalSortOrder() { topological_order_loop_head_stack_.clear(); topological_order_loop_head_stack_.reserve(max_nested_loops); max_nested_loops_ = max_nested_loops; + topological_order_up_to_date_ = true; } bool BasicBlock::IsExceptionBlock() const { @@ -2246,12 +2285,6 @@ void BasicBlock::Kill(MIRGraph* mir_graph) { } predecessors.clear(); - KillUnreachable(mir_graph); -} - -void BasicBlock::KillUnreachable(MIRGraph* mir_graph) { - DCHECK(predecessors.empty()); // Unreachable. - // Mark as dead and hidden. block_type = kDead; hidden = true; @@ -2270,9 +2303,6 @@ void BasicBlock::KillUnreachable(MIRGraph* mir_graph) { ChildBlockIterator iter(this, mir_graph); for (BasicBlock* succ_bb = iter.Next(); succ_bb != nullptr; succ_bb = iter.Next()) { succ_bb->ErasePredecessor(id); - if (succ_bb->predecessors.empty()) { - succ_bb->KillUnreachable(mir_graph); - } } // Remove links to children. @@ -2393,7 +2423,8 @@ void BasicBlock::UpdatePredecessor(BasicBlockId old_pred, BasicBlockId new_pred) // Create a new basic block with block_id as num_blocks_ that is // post-incremented. BasicBlock* MIRGraph::CreateNewBB(BBType block_type) { - BasicBlock* res = NewMemBB(block_type, num_blocks_++); + BasicBlockId id = static_cast<BasicBlockId>(block_list_.size()); + BasicBlock* res = NewMemBB(block_type, id); block_list_.push_back(res); return res; } @@ -2403,10 +2434,6 @@ void MIRGraph::CalculateBasicBlockInformation() { driver.Launch(); } -void MIRGraph::InitializeBasicBlockData() { - num_blocks_ = block_list_.size(); -} - int MIR::DecodedInstruction::FlagsOf() const { // Calculate new index. int idx = static_cast<int>(opcode) - kNumPackedOpcodes; diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 851ca150b5..af97f51975 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -410,18 +410,12 @@ class BasicBlock : public DeletableArenaObject<kArenaAllocBB> { /** * @brief Kill the BasicBlock. - * @details Unlink predecessors to make this block unreachable, then KillUnreachable(). + * @details Unlink predecessors and successors, remove all MIRs, set the block type to kDead + * and set hidden to true. */ void Kill(MIRGraph* mir_graph); /** - * @brief Kill the unreachable block and all blocks that become unreachable by killing this one. - * @details Set the block type to kDead and set hidden to true, remove all MIRs, - * unlink all successors and recursively kill successors that become unreachable. - */ - void KillUnreachable(MIRGraph* mir_graph); - - /** * @brief Is ssa_reg the last SSA definition of that VR in the block? */ bool IsSSALiveOut(const CompilationUnit* c_unit, int ssa_reg); @@ -574,7 +568,7 @@ class MIRGraph { } unsigned int GetNumBlocks() const { - return num_blocks_; + return block_list_.size(); } /** @@ -704,7 +698,9 @@ class MIRGraph { void DumpRegLocTable(RegLocation* table, int count); + void BasicBlockOptimizationStart(); void BasicBlockOptimization(); + void BasicBlockOptimizationEnd(); const ArenaVector<BasicBlockId>& GetTopologicalSortOrder() { DCHECK(!topological_order_.empty()); @@ -1198,7 +1194,6 @@ class MIRGraph { void AllocateSSAUseData(MIR *mir, int num_uses); void AllocateSSADefData(MIR *mir, int num_defs); void CalculateBasicBlockInformation(); - void InitializeBasicBlockData(); void ComputeDFSOrders(); void ComputeDefBlockMatrix(); void ComputeDominators(); @@ -1211,6 +1206,18 @@ class MIRGraph { return dfs_orders_up_to_date_; } + bool DominationUpToDate() const { + return domination_up_to_date_; + } + + bool MirSsaRepUpToDate() const { + return mir_ssa_rep_up_to_date_; + } + + bool TopologicalOrderUpToDate() const { + return topological_order_up_to_date_; + } + /* * IsDebugBuild sanity check: keep track of the Dex PCs for catch entries so that later on * we can verify that all catch entries have native PC entries. @@ -1321,6 +1328,9 @@ class MIRGraph { unsigned int num_reachable_blocks_; unsigned int max_num_reachable_blocks_; bool dfs_orders_up_to_date_; + bool domination_up_to_date_; + bool mir_ssa_rep_up_to_date_; + bool topological_order_up_to_date_; ArenaVector<BasicBlockId> dfs_order_; ArenaVector<BasicBlockId> dfs_post_order_; ArenaVector<BasicBlockId> dom_post_order_traversal_; @@ -1379,7 +1389,6 @@ class MIRGraph { ArenaBitVector* try_block_addr_; BasicBlock* entry_block_; BasicBlock* exit_block_; - unsigned int num_blocks_; const DexFile::CodeItem* current_code_item_; ArenaVector<uint16_t> dex_pc_to_block_map_; // FindBlock lookup cache. ArenaVector<DexCompilationUnit*> m_units_; // List of methods included in this graph diff --git a/compiler/dex/mir_graph_test.cc b/compiler/dex/mir_graph_test.cc index a96cd84297..8a7e71f4af 100644 --- a/compiler/dex/mir_graph_test.cc +++ b/compiler/dex/mir_graph_test.cc @@ -89,7 +89,6 @@ class TopologicalSortOrderTest : public testing::Test { cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo)); } } - cu_.mir_graph->num_blocks_ = count; ASSERT_EQ(count, cu_.mir_graph->block_list_.size()); cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1]; ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type); diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 6e9844cb7f..15b83413b7 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -485,9 +485,11 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { mir->ssa_rep->num_uses = 0; BasicBlock* successor_to_unlink = GetBasicBlock(edge_to_kill); successor_to_unlink->ErasePredecessor(bb->id); - if (successor_to_unlink->predecessors.empty()) { - successor_to_unlink->KillUnreachable(this); - } + // We have changed the graph structure. + dfs_orders_up_to_date_ = false; + domination_up_to_date_ = false; + topological_order_up_to_date_ = false; + // Keep MIR SSA rep, the worst that can happen is a Phi with just 1 input. } break; case Instruction::CMPL_FLOAT: @@ -649,36 +651,36 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { * Phi node only contains our two cases as input, we will use the result * SSA name of the Phi node as our select result and delete the Phi. If * the Phi node has more than two operands, we will arbitrarily use the SSA - * name of the "true" path, delete the SSA name of the "false" path from the + * name of the "false" path, delete the SSA name of the "true" path from the * Phi node (and fix up the incoming arc list). */ if (phi->ssa_rep->num_uses == 2) { mir->ssa_rep->defs[0] = phi->ssa_rep->defs[0]; - phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); + // Rather than changing the Phi to kMirOpNop, remove it completely. + // This avoids leaving other Phis after kMirOpNop (i.e. a non-Phi) insn. + tk_tk->RemoveMIR(phi); + int dead_false_def = if_false->ssa_rep->defs[0]; + raw_use_counts_[dead_false_def] = use_counts_[dead_false_def] = 0; } else { - int dead_def = if_false->ssa_rep->defs[0]; - int live_def = if_true->ssa_rep->defs[0]; + int live_def = if_false->ssa_rep->defs[0]; mir->ssa_rep->defs[0] = live_def; - BasicBlockId* incoming = phi->meta.phi_incoming; - for (int i = 0; i < phi->ssa_rep->num_uses; i++) { - if (phi->ssa_rep->uses[i] == live_def) { - incoming[i] = bb->id; - } - } - for (int i = 0; i < phi->ssa_rep->num_uses; i++) { - if (phi->ssa_rep->uses[i] == dead_def) { - int last_slot = phi->ssa_rep->num_uses - 1; - phi->ssa_rep->uses[i] = phi->ssa_rep->uses[last_slot]; - incoming[i] = incoming[last_slot]; - } - } - } - phi->ssa_rep->num_uses--; - bb->taken = NullBasicBlockId; - tk->block_type = kDead; - for (MIR* tmir = ft->first_mir_insn; tmir != NULL; tmir = tmir->next) { - tmir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); } + int dead_true_def = if_true->ssa_rep->defs[0]; + raw_use_counts_[dead_true_def] = use_counts_[dead_true_def] = 0; + // We want to remove ft and tk and link bb directly to ft_ft. First, we need + // to update all Phi inputs correctly with UpdatePredecessor(ft->id, bb->id) + // since the live_def above comes from ft->first_mir_insn (if_false). + DCHECK(if_false == ft->first_mir_insn); + ft_ft->UpdatePredecessor(ft->id, bb->id); + // Correct the rest of the links between bb, ft and ft_ft. + ft->ErasePredecessor(bb->id); + ft->fall_through = NullBasicBlockId; + bb->fall_through = ft_ft->id; + // Now we can kill tk and ft. + tk->Kill(this); + ft->Kill(this); + // NOTE: DFS order, domination info and topological order are still usable + // despite the newly dead blocks. } } } @@ -788,43 +790,9 @@ void MIRGraph::CombineBlocks(class BasicBlock* bb) { MIR* mir = bb->last_mir_insn; DCHECK(bb->first_mir_insn != nullptr); - // Grab the attributes from the paired opcode. + // Get the paired insn and check if it can still throw. MIR* throw_insn = mir->meta.throw_insn; - uint64_t df_attributes = GetDataFlowAttributes(throw_insn); - - // Don't combine if the throw_insn can still throw NPE. - if ((df_attributes & DF_HAS_NULL_CHKS) != 0 && - (throw_insn->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) { - break; - } - // Now whitelist specific instructions. - bool ok = false; - if ((df_attributes & DF_IFIELD) != 0) { - // Combine only if fast, otherwise weird things can happen. - const MirIFieldLoweringInfo& field_info = GetIFieldLoweringInfo(throw_insn); - ok = (df_attributes & DF_DA) ? field_info.FastGet() : field_info.FastPut(); - } else if ((df_attributes & DF_SFIELD) != 0) { - // Combine only if fast, otherwise weird things can happen. - const MirSFieldLoweringInfo& field_info = GetSFieldLoweringInfo(throw_insn); - bool fast = ((df_attributes & DF_DA) ? field_info.FastGet() : field_info.FastPut()); - // Don't combine if the SGET/SPUT can call <clinit>(). - bool clinit = !field_info.IsClassInitialized() && - (throw_insn->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0; - ok = fast && !clinit; - } else if ((df_attributes & DF_HAS_RANGE_CHKS) != 0) { - // Only AGET/APUT have range checks. We have processed the AGET/APUT null check above. - DCHECK_NE(throw_insn->optimization_flags & MIR_IGNORE_NULL_CHECK, 0); - ok = ((throw_insn->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0); - } else if ((throw_insn->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) { - // We can encounter a non-throwing insn here thanks to inlining or other optimizations. - ok = true; - } else if (throw_insn->dalvikInsn.opcode == Instruction::ARRAY_LENGTH || - throw_insn->dalvikInsn.opcode == Instruction::FILL_ARRAY_DATA || - static_cast<int>(throw_insn->dalvikInsn.opcode) == kMirOpNullCheck) { - // No more checks for these (null check was processed above). - ok = true; - } - if (!ok) { + if (CanThrow(throw_insn)) { break; } @@ -863,9 +831,6 @@ void MIRGraph::CombineBlocks(class BasicBlock* bb) { BasicBlock* succ_bb = GetBasicBlock(succ_info->block); DCHECK(succ_bb->catch_entry); succ_bb->ErasePredecessor(bb->id); - if (succ_bb->predecessors.empty()) { - succ_bb->KillUnreachable(this); - } } } } @@ -908,8 +873,10 @@ void MIRGraph::CombineBlocks(class BasicBlock* bb) { child->UpdatePredecessor(bb_next->id, bb->id); } - // DFS orders are not up to date anymore. + // DFS orders, domination and topological order are not up to date anymore. dfs_orders_up_to_date_ = false; + domination_up_to_date_ = false; + topological_order_up_to_date_ = false; // Now, loop back and see if we can keep going } @@ -1581,7 +1548,7 @@ bool MIRGraph::BuildExtendedBBList(class BasicBlock* bb) { return false; // Not iterative - return value will be ignored } -void MIRGraph::BasicBlockOptimization() { +void MIRGraph::BasicBlockOptimizationStart() { if ((cu_->disable_opt & (1 << kLocalValueNumbering)) == 0) { temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack)); temp_.gvn.ifield_ids_ = @@ -1589,7 +1556,9 @@ void MIRGraph::BasicBlockOptimization() { temp_.gvn.sfield_ids_ = GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_); } +} +void MIRGraph::BasicBlockOptimization() { if ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) { ClearAllVisitedFlags(); PreOrderDfsIterator iter2(this); @@ -1606,7 +1575,9 @@ void MIRGraph::BasicBlockOptimization() { BasicBlockOpt(bb); } } +} +void MIRGraph::BasicBlockOptimizationEnd() { // Clean up after LVN. temp_.gvn.ifield_ids_ = nullptr; temp_.gvn.sfield_ids_ = nullptr; @@ -1719,32 +1690,37 @@ bool MIRGraph::CanThrow(MIR* mir) { const int opt_flags = mir->optimization_flags; uint64_t df_attributes = GetDataFlowAttributes(mir); + // First, check if the insn can still throw NPE. if (((df_attributes & DF_HAS_NULL_CHKS) != 0) && ((opt_flags & MIR_IGNORE_NULL_CHECK) == 0)) { return true; } + + // Now process specific instructions. if ((df_attributes & DF_IFIELD) != 0) { - // The IGET/IPUT family. + // The IGET/IPUT family. We have processed the IGET/IPUT null check above. + DCHECK_NE(opt_flags & MIR_IGNORE_NULL_CHECK, 0); + // If not fast, weird things can happen and the insn can throw. const MirIFieldLoweringInfo& field_info = GetIFieldLoweringInfo(mir); - bool fast = (df_attributes & DF_DA) ? field_info.FastGet() : field_info.FastPut(); - // Already processed null check above. - if (fast) { - return false; - } - } else if ((df_attributes & DF_HAS_RANGE_CHKS) != 0) { - // The AGET/APUT family. - // Already processed null check above. - if ((opt_flags & MIR_IGNORE_RANGE_CHECK) != 0) { - return false; - } + bool fast = (df_attributes & DF_DA) != 0 ? field_info.FastGet() : field_info.FastPut(); + return !fast; } else if ((df_attributes & DF_SFIELD) != 0) { - // The SGET/SPUT family. + // The SGET/SPUT family. Check for potentially throwing class initialization. + // Also, if not fast, weird things can happen and the insn can throw. const MirSFieldLoweringInfo& field_info = GetSFieldLoweringInfo(mir); - bool fast = (df_attributes & DF_DA) ? field_info.FastGet() : field_info.FastPut(); + bool fast = (df_attributes & DF_DA) != 0 ? field_info.FastGet() : field_info.FastPut(); bool is_class_initialized = field_info.IsClassInitialized() || ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0); - if (fast && is_class_initialized) { - return false; - } + return !(fast && is_class_initialized); + } else if ((df_attributes & DF_HAS_RANGE_CHKS) != 0) { + // Only AGET/APUT have range checks. We have processed the AGET/APUT null check above. + DCHECK_NE(opt_flags & MIR_IGNORE_NULL_CHECK, 0); + // Non-throwing only if range check has been eliminated. + return ((opt_flags & MIR_IGNORE_RANGE_CHECK) == 0); + } else if (mir->dalvikInsn.opcode == Instruction::ARRAY_LENGTH || + mir->dalvikInsn.opcode == Instruction::FILL_ARRAY_DATA || + static_cast<int>(mir->dalvikInsn.opcode) == kMirOpNullCheck) { + // No more checks for these (null check was processed above). + return false; } return true; } diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc index 6c2e9c0b27..362c7fdc05 100644 --- a/compiler/dex/mir_optimization_test.cc +++ b/compiler/dex/mir_optimization_test.cc @@ -129,7 +129,6 @@ class MirOptimizationTest : public testing::Test { cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo)); } } - cu_.mir_graph->num_blocks_ = count; ASSERT_EQ(count, cu_.mir_graph->block_list_.size()); cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1]; ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type); diff --git a/compiler/dex/pass_driver_me_post_opt.cc b/compiler/dex/pass_driver_me_post_opt.cc index e6238e9f25..9b56c0da87 100644 --- a/compiler/dex/pass_driver_me_post_opt.cc +++ b/compiler/dex/pass_driver_me_post_opt.cc @@ -31,20 +31,19 @@ namespace art { // The initial list of passes to be used by the PassDriveMEPostOpt. template<> const Pass* const PassDriver<PassDriverMEPostOpt>::g_passes[] = { - GetPassInstance<InitializeData>(), - GetPassInstance<ClearPhiInstructions>(), - GetPassInstance<DFSOrders>(), - GetPassInstance<BuildDomination>(), - GetPassInstance<TopologicalSortOrders>(), - GetPassInstance<DefBlockMatrix>(), - GetPassInstance<CreatePhiNodes>(), - GetPassInstance<ClearVisitedFlag>(), - GetPassInstance<SSAConversion>(), - GetPassInstance<PhiNodeOperands>(), - GetPassInstance<ConstantPropagation>(), - GetPassInstance<PerformInitRegLocations>(), - GetPassInstance<MethodUseCount>(), - GetPassInstance<FreeData>(), + GetPassInstance<DFSOrders>(), + GetPassInstance<BuildDomination>(), + GetPassInstance<TopologicalSortOrders>(), + GetPassInstance<InitializeSSATransformation>(), + GetPassInstance<ClearPhiInstructions>(), + GetPassInstance<DefBlockMatrix>(), + GetPassInstance<CreatePhiNodes>(), + GetPassInstance<SSAConversion>(), + GetPassInstance<PhiNodeOperands>(), + GetPassInstance<ConstantPropagation>(), + GetPassInstance<PerformInitRegLocations>(), + GetPassInstance<MethodUseCount>(), + GetPassInstance<FinishSSATransformation>(), }; // The number of the passes in the initial list of Passes (g_passes). diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h index 7b84ba88c5..964355bb5d 100644 --- a/compiler/dex/post_opt_passes.h +++ b/compiler/dex/post_opt_passes.h @@ -24,13 +24,31 @@ namespace art { /** - * @class InitializeData + * @class PassMEMirSsaRep + * @brief Convenience class for passes that check MIRGraph::MirSsaRepUpToDate(). + */ +class PassMEMirSsaRep : public PassME { + public: + PassMEMirSsaRep(const char* name, DataFlowAnalysisMode type = kAllNodes) + : PassME(name, type) { + } + + bool Gate(const PassDataHolder* data) const OVERRIDE { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + return !c_unit->mir_graph->MirSsaRepUpToDate(); + } +}; + +/** + * @class InitializeSSATransformation * @brief There is some data that needs to be initialized before performing * the post optimization passes. */ -class InitializeData : public PassME { +class InitializeSSATransformation : public PassMEMirSsaRep { public: - InitializeData() : PassME("InitializeData", kNoNodes) { + InitializeSSATransformation() : PassMEMirSsaRep("InitializeSSATransformation", kNoNodes) { } void Start(PassDataHolder* data) const { @@ -39,8 +57,8 @@ class InitializeData : public PassME { DCHECK(data != nullptr); CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; DCHECK(c_unit != nullptr); - c_unit->mir_graph.get()->InitializeBasicBlockData(); - c_unit->mir_graph.get()->SSATransformationStart(); + c_unit->mir_graph->SSATransformationStart(); + c_unit->mir_graph->CompilerInitializeSSAConversion(); } }; @@ -62,9 +80,9 @@ class MethodUseCount : public PassME { * @class ClearPhiInformation * @brief Clear the PHI nodes from the CFG. */ -class ClearPhiInstructions : public PassME { +class ClearPhiInstructions : public PassMEMirSsaRep { public: - ClearPhiInstructions() : PassME("ClearPhiInstructions") { + ClearPhiInstructions() : PassMEMirSsaRep("ClearPhiInstructions") { } bool Worker(PassDataHolder* data) const; @@ -115,12 +133,18 @@ class BuildDomination : public PassME { BuildDomination() : PassME("BuildDomination", kNoNodes) { } + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + return !c_unit->mir_graph->DominationUpToDate(); + } + void Start(PassDataHolder* data) const { DCHECK(data != nullptr); CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; DCHECK(c_unit != nullptr); - c_unit->mir_graph.get()->ComputeDominators(); - c_unit->mir_graph.get()->CompilerInitializeSSAConversion(); + c_unit->mir_graph->ComputeDominators(); } void End(PassDataHolder* data) const { @@ -143,6 +167,13 @@ class TopologicalSortOrders : public PassME { TopologicalSortOrders() : PassME("TopologicalSortOrders", kNoNodes) { } + bool Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + return !c_unit->mir_graph->TopologicalOrderUpToDate(); + } + void Start(PassDataHolder* data) const { DCHECK(data != nullptr); CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; @@ -155,9 +186,9 @@ class TopologicalSortOrders : public PassME { * @class DefBlockMatrix * @brief Calculate the matrix of definition per basic block */ -class DefBlockMatrix : public PassME { +class DefBlockMatrix : public PassMEMirSsaRep { public: - DefBlockMatrix() : PassME("DefBlockMatrix", kNoNodes) { + DefBlockMatrix() : PassMEMirSsaRep("DefBlockMatrix", kNoNodes) { } void Start(PassDataHolder* data) const { @@ -172,9 +203,9 @@ class DefBlockMatrix : public PassME { * @class CreatePhiNodes * @brief Pass to create the phi nodes after SSA calculation */ -class CreatePhiNodes : public PassME { +class CreatePhiNodes : public PassMEMirSsaRep { public: - CreatePhiNodes() : PassME("CreatePhiNodes", kNoNodes) { + CreatePhiNodes() : PassMEMirSsaRep("CreatePhiNodes", kNoNodes) { } void Start(PassDataHolder* data) const { @@ -186,30 +217,12 @@ class CreatePhiNodes : public PassME { }; /** - * @class ClearVisitedFlag - * @brief Pass to clear the visited flag for all basic blocks. - */ - -class ClearVisitedFlag : public PassME { - public: - ClearVisitedFlag() : PassME("ClearVisitedFlag", kNoNodes) { - } - - void Start(PassDataHolder* data) const { - DCHECK(data != nullptr); - CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; - DCHECK(c_unit != nullptr); - c_unit->mir_graph.get()->ClearAllVisitedFlags(); - } -}; - -/** * @class SSAConversion * @brief Pass for SSA conversion of MIRs */ -class SSAConversion : public PassME { +class SSAConversion : public PassMEMirSsaRep { public: - SSAConversion() : PassME("SSAConversion", kNoNodes) { + SSAConversion() : PassMEMirSsaRep("SSAConversion", kNoNodes) { } void Start(PassDataHolder* data) const { @@ -217,6 +230,7 @@ class SSAConversion : public PassME { CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; DCHECK(c_unit != nullptr); MIRGraph *mir_graph = c_unit->mir_graph.get(); + mir_graph->ClearAllVisitedFlags(); mir_graph->DoDFSPreOrderSSARename(mir_graph->GetEntryBlock()); } }; @@ -225,9 +239,9 @@ class SSAConversion : public PassME { * @class PhiNodeOperands * @brief Pass to insert the Phi node operands to basic blocks */ -class PhiNodeOperands : public PassME { +class PhiNodeOperands : public PassMEMirSsaRep { public: - PhiNodeOperands() : PassME("PhiNodeOperands", kPreOrderDFSTraversal) { + PhiNodeOperands() : PassMEMirSsaRep("PhiNodeOperands", kPreOrderDFSTraversal) { } bool Worker(PassDataHolder* data) const { @@ -246,9 +260,9 @@ class PhiNodeOperands : public PassME { * @class InitRegLocations * @brief Initialize Register Locations. */ -class PerformInitRegLocations : public PassME { +class PerformInitRegLocations : public PassMEMirSsaRep { public: - PerformInitRegLocations() : PassME("PerformInitRegLocation", kNoNodes) { + PerformInitRegLocations() : PassMEMirSsaRep("PerformInitRegLocation", kNoNodes) { } void Start(PassDataHolder* data) const { @@ -263,9 +277,9 @@ class PerformInitRegLocations : public PassME { * @class ConstantPropagation * @brief Perform a constant propagation pass. */ -class ConstantPropagation : public PassME { +class ConstantPropagation : public PassMEMirSsaRep { public: - ConstantPropagation() : PassME("ConstantPropagation") { + ConstantPropagation() : PassMEMirSsaRep("ConstantPropagation") { } bool Worker(PassDataHolder* data) const { @@ -288,12 +302,12 @@ class ConstantPropagation : public PassME { }; /** - * @class FreeData + * @class FinishSSATransformation * @brief There is some data that needs to be freed after performing the post optimization passes. */ -class FreeData : public PassME { +class FinishSSATransformation : public PassMEMirSsaRep { public: - FreeData() : PassME("FreeData", kNoNodes) { + FinishSSATransformation() : PassMEMirSsaRep("FinishSSATransformation", kNoNodes) { } void End(PassDataHolder* data) const { diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 99b2166030..0713b7a18e 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -23,6 +23,7 @@ #include "mirror/art_method.h" #include "mirror/object_array-inl.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "utils.h" namespace art { diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index fe1d12610a..03e0e92aec 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -23,6 +23,7 @@ #include "dex/reg_storage_eq.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/array-inl.h" +#include "utils.h" namespace art { @@ -567,21 +568,29 @@ bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div // Try to convert *lit to 1 RegRegRegShift/RegRegShift form. bool ArmMir2Lir::GetEasyMultiplyOp(int lit, ArmMir2Lir::EasyMultiplyOp* op) { + if (lit == 0) { + // Special case for *divide-by-zero*. The ops won't actually be used to generate code, as + // GenArithOpIntLit will directly generate exception-throwing code, and multiply-by-zero will + // have been optimized away earlier. + op->op = kOpInvalid; + return true; + } + if (IsPowerOfTwo(lit)) { op->op = kOpLsl; - op->shift = LowestSetBit(lit); + op->shift = CTZ(lit); return true; } if (IsPowerOfTwo(lit - 1)) { op->op = kOpAdd; - op->shift = LowestSetBit(lit - 1); + op->shift = CTZ(lit - 1); return true; } if (IsPowerOfTwo(lit + 1)) { op->op = kOpRsub; - op->shift = LowestSetBit(lit + 1); + op->shift = CTZ(lit + 1); return true; } @@ -599,7 +608,7 @@ bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { } int lit1 = lit; - uint32_t shift = LowestSetBit(lit1); + uint32_t shift = CTZ(lit1); if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { ops[1].op = kOpLsl; ops[1].shift = shift; @@ -607,7 +616,7 @@ bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { } lit1 = lit - 1; - shift = LowestSetBit(lit1); + shift = CTZ(lit1); if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { ops[1].op = kOpAdd; ops[1].shift = shift; @@ -615,7 +624,7 @@ bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { } lit1 = lit + 1; - shift = LowestSetBit(lit1); + shift = CTZ(lit1); if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { ops[1].op = kOpRsub; ops[1].shift = shift; diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index d3743531fb..b05939156f 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -749,6 +749,7 @@ void ArmMir2Lir::FreeCallTemps() { FreeTemp(rs_r1); FreeTemp(rs_r2); FreeTemp(rs_r3); + FreeTemp(TargetReg(kHiddenArg)); if (!kArm32QuickCodeUseSoftFloat) { FreeTemp(rs_fr0); FreeTemp(rs_fr1); diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 5ac2aa080d..88ab6f82d4 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -543,7 +543,7 @@ bool Arm64Mir2Lir::HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_d return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int32_t>(lit)); } } - int k = LowestSetBit(lit); + int k = CTZ(lit); if (k >= nbits - 2) { // Avoid special cases. return false; diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 030c5ed2f4..ee7e818f85 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -759,6 +759,7 @@ void Arm64Mir2Lir::FreeCallTemps() { FreeTemp(rs_f5); FreeTemp(rs_f6); FreeTemp(rs_f7); + FreeTemp(TargetReg(kHiddenArg)); } RegStorage Arm64Mir2Lir::LoadHelper(QuickEntrypointEnum trampoline) { diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index cc61e93d82..ae9b0f4baf 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1086,12 +1086,20 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { }); std::unique_ptr<std::vector<uint8_t>> cfi_info(ReturnFrameDescriptionEntry()); - CompiledMethod* result = - new CompiledMethod(cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_, - core_spill_mask_, fp_spill_mask_, &src_mapping_table_, encoded_mapping_table_, - vmap_encoder.GetData(), native_gc_map_, cfi_info.get(), - ArrayRef<LinkerPatch>(patches_)); - return result; + ArrayRef<const uint8_t> cfi_ref; + if (cfi_info.get() != nullptr) { + cfi_ref = ArrayRef<const uint8_t>(*cfi_info); + } + return CompiledMethod::SwapAllocCompiledMethod( + cu_->compiler_driver, cu_->instruction_set, + ArrayRef<const uint8_t>(code_buffer_), + frame_size_, core_spill_mask_, fp_spill_mask_, + &src_mapping_table_, + ArrayRef<const uint8_t>(encoded_mapping_table_), + ArrayRef<const uint8_t>(vmap_encoder.GetData()), + ArrayRef<const uint8_t>(native_gc_map_), + cfi_ref, + ArrayRef<LinkerPatch>(patches_)); } size_t Mir2Lir::GetMaxPossibleCompilerTemps() const { @@ -1167,24 +1175,6 @@ void Mir2Lir::InsertLIRAfter(LIR* current_lir, LIR* new_lir) { new_lir->next->prev = new_lir; } -bool Mir2Lir::IsPowerOfTwo(uint64_t x) { - return (x & (x - 1)) == 0; -} - -// Returns the index of the lowest set bit in 'x'. -int32_t Mir2Lir::LowestSetBit(uint64_t x) { - int bit_posn = 0; - while ((x & 0xf) == 0) { - bit_posn += 4; - x >>= 4; - } - while ((x & 1) == 0) { - bit_posn++; - x >>= 1; - } - return bit_posn; -} - bool Mir2Lir::PartiallyIntersects(RegLocation rl_src, RegLocation rl_dest) { DCHECK(rl_src.wide); DCHECK(rl_dest.wide); diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 50014b05b1..3733507a50 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -13,6 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +#include <functional> + #include "arch/arm/instruction_set_features_arm.h" #include "dex/compiler_ir.h" #include "dex/compiler_internals.h" @@ -23,8 +26,8 @@ #include "mirror/object_array-inl.h" #include "mirror/object-inl.h" #include "mirror/object_reference.h" +#include "utils.h" #include "verifier/method_verifier.h" -#include <functional> namespace art { @@ -371,19 +374,19 @@ void Mir2Lir::GenNewArray(uint32_t type_idx, RegLocation rl_dest, // The fast path. if (!use_direct_type_ptr) { LoadClassType(*dex_file, type_idx, kArg0); - CallRuntimeHelperRegMethodRegLocation(kQuickAllocArrayResolved, TargetReg(kArg0, kNotWide), + CallRuntimeHelperRegRegLocationMethod(kQuickAllocArrayResolved, TargetReg(kArg0, kNotWide), rl_src, true); } else { // Use the direct pointer. - CallRuntimeHelperImmMethodRegLocation(kQuickAllocArrayResolved, direct_type_ptr, rl_src, + CallRuntimeHelperImmRegLocationMethod(kQuickAllocArrayResolved, direct_type_ptr, rl_src, true); } } else { // The slow path. - CallRuntimeHelperImmMethodRegLocation(kQuickAllocArray, type_idx, rl_src, true); + CallRuntimeHelperImmRegLocationMethod(kQuickAllocArray, type_idx, rl_src, true); } } else { - CallRuntimeHelperImmMethodRegLocation(kQuickAllocArrayWithAccessCheck, type_idx, rl_src, true); + CallRuntimeHelperImmRegLocationMethod(kQuickAllocArrayWithAccessCheck, type_idx, rl_src, true); } StoreValue(rl_dest, GetReturn(kRefReg)); } @@ -405,7 +408,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { } else { target = kQuickCheckAndAllocArrayWithAccessCheck; } - CallRuntimeHelperImmMethodImm(target, type_idx, elems, true); + CallRuntimeHelperImmImmMethod(target, type_idx, elems, true); FreeTemp(TargetReg(kArg2, kNotWide)); FreeTemp(TargetReg(kArg1, kNotWide)); /* @@ -1098,7 +1101,7 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) { void Compile() { GenerateTargetLabel(); - m2l_->CallRuntimeHelperRegImm(kQuickResolveString, r_method_, string_idx_, true); + m2l_->CallRuntimeHelperImmReg(kQuickResolveString, string_idx_, r_method_, true); m2l_->OpUnconditionalBranch(cont_); } @@ -1733,7 +1736,7 @@ bool Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div, if ((cu_->instruction_set == kThumb2) && !IsPowerOfTwo(lit)) { return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit); } - int k = LowestSetBit(lit); + int k = CTZ(lit); if (k >= 30) { // Avoid special cases. return false; @@ -1813,18 +1816,18 @@ bool Mir2Lir::HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int li RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (power_of_two) { // Shift. - OpRegRegImm(kOpLsl, rl_result.reg, rl_src.reg, LowestSetBit(lit)); + OpRegRegImm(kOpLsl, rl_result.reg, rl_src.reg, CTZ(lit)); } else if (pop_count_le2) { // Shift and add and shift. - int first_bit = LowestSetBit(lit); - int second_bit = LowestSetBit(lit ^ (1 << first_bit)); + int first_bit = CTZ(lit); + int second_bit = CTZ(lit ^ (1 << first_bit)); GenMultiplyByTwoBitMultiplier(rl_src, rl_result, lit, first_bit, second_bit); } else { // Reverse subtract: (src << (shift + 1)) - src. DCHECK(power_of_two_minus_one); - // TUNING: rsb dst, src, src lsl#LowestSetBit(lit + 1) + // TUNING: rsb dst, src, src lsl#CTZ(lit + 1) RegStorage t_reg = AllocTemp(); - OpRegRegImm(kOpLsl, t_reg, rl_src.reg, LowestSetBit(lit + 1)); + OpRegRegImm(kOpLsl, t_reg, rl_src.reg, CTZ(lit + 1)); OpRegRegReg(kOpSub, rl_result.reg, t_reg, rl_src.reg); } StoreValue(rl_dest, rl_result); diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 9462d3d08f..c99be641a9 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -201,16 +201,16 @@ void Mir2Lir::CallRuntimeHelperRegMethod(QuickEntrypointEnum trampoline, RegStor CallHelper(r_tgt, trampoline, safepoint_pc); } -void Mir2Lir::CallRuntimeHelperRegMethodRegLocation(QuickEntrypointEnum trampoline, RegStorage arg0, - RegLocation arg2, bool safepoint_pc) { +void Mir2Lir::CallRuntimeHelperRegRegLocationMethod(QuickEntrypointEnum trampoline, RegStorage arg0, + RegLocation arg1, bool safepoint_pc) { RegStorage r_tgt = CallHelperSetup(trampoline); - DCHECK(!IsSameReg(TargetReg(kArg1, arg0.GetWideKind()), arg0)); + DCHECK(!IsSameReg(TargetReg(kArg2, arg0.GetWideKind()), arg0)); RegStorage r_tmp = TargetReg(kArg0, arg0.GetWideKind()); if (r_tmp.NotExactlyEquals(arg0)) { OpRegCopy(r_tmp, arg0); } - LoadCurrMethodDirect(TargetReg(kArg1, kRef)); - LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2)); + LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1)); + LoadCurrMethodDirect(TargetReg(kArg2, kRef)); ClobberCallerSave(); CallHelper(r_tgt, trampoline, safepoint_pc); } @@ -306,21 +306,21 @@ void Mir2Lir::CallRuntimeHelperRegRegImm(QuickEntrypointEnum trampoline, RegStor CallHelper(r_tgt, trampoline, safepoint_pc); } -void Mir2Lir::CallRuntimeHelperImmMethodRegLocation(QuickEntrypointEnum trampoline, int arg0, - RegLocation arg2, bool safepoint_pc) { +void Mir2Lir::CallRuntimeHelperImmRegLocationMethod(QuickEntrypointEnum trampoline, int arg0, + RegLocation arg1, bool safepoint_pc) { RegStorage r_tgt = CallHelperSetup(trampoline); - LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2)); - LoadCurrMethodDirect(TargetReg(kArg1, kRef)); + LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1)); + LoadCurrMethodDirect(TargetReg(kArg2, kRef)); LoadConstant(TargetReg(kArg0, kNotWide), arg0); ClobberCallerSave(); CallHelper(r_tgt, trampoline, safepoint_pc); } -void Mir2Lir::CallRuntimeHelperImmMethodImm(QuickEntrypointEnum trampoline, int arg0, int arg2, +void Mir2Lir::CallRuntimeHelperImmImmMethod(QuickEntrypointEnum trampoline, int arg0, int arg1, bool safepoint_pc) { RegStorage r_tgt = CallHelperSetup(trampoline); - LoadCurrMethodDirect(TargetReg(kArg1, kRef)); - LoadConstant(TargetReg(kArg2, kNotWide), arg2); + LoadCurrMethodDirect(TargetReg(kArg2, kRef)); + LoadConstant(TargetReg(kArg1, kNotWide), arg1); LoadConstant(TargetReg(kArg0, kNotWide), arg0); ClobberCallerSave(); CallHelper(r_tgt, trampoline, safepoint_pc); @@ -1557,7 +1557,7 @@ void Mir2Lir::GenInvokeNoInline(CallInfo* info) { LIR* call_insn = GenCallInsn(method_info); MarkSafepointPC(call_insn); - ClobberCallerSave(); + FreeCallTemps(); if (info->result.location != kLocInvalid) { // We have a following MOVE_RESULT - do it now. if (info->result.wide) { diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index efa130c65d..c22ba04e08 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -445,6 +445,7 @@ void MipsMir2Lir::FreeCallTemps() { FreeTemp(rs_rMIPS_ARG1); FreeTemp(rs_rMIPS_ARG2); FreeTemp(rs_rMIPS_ARG3); + FreeTemp(TargetReg(kHiddenArg)); } bool MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind ATTRIBUTE_UNUSED) { diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 524ee21e63..1ff64c9ee7 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -603,13 +603,13 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list } case Instruction::AGET_WIDE: - GenArrayGet(opt_flags, k64, rl_src[0], rl_src[1], rl_dest, 3); + GenArrayGet(opt_flags, rl_dest.fp ? kDouble : k64, rl_src[0], rl_src[1], rl_dest, 3); break; case Instruction::AGET_OBJECT: GenArrayGet(opt_flags, kReference, rl_src[0], rl_src[1], rl_dest, 2); break; case Instruction::AGET: - GenArrayGet(opt_flags, k32, rl_src[0], rl_src[1], rl_dest, 2); + GenArrayGet(opt_flags, rl_dest.fp ? kSingle : k32, rl_src[0], rl_src[1], rl_dest, 2); break; case Instruction::AGET_BOOLEAN: GenArrayGet(opt_flags, kUnsignedByte, rl_src[0], rl_src[1], rl_dest, 0); @@ -624,10 +624,10 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list GenArrayGet(opt_flags, kSignedHalf, rl_src[0], rl_src[1], rl_dest, 1); break; case Instruction::APUT_WIDE: - GenArrayPut(opt_flags, k64, rl_src[1], rl_src[2], rl_src[0], 3, false); + GenArrayPut(opt_flags, rl_src[0].fp ? kDouble : k64, rl_src[1], rl_src[2], rl_src[0], 3, false); break; case Instruction::APUT: - GenArrayPut(opt_flags, k32, rl_src[1], rl_src[2], rl_src[0], 2, false); + GenArrayPut(opt_flags, rl_src[0].fp ? kSingle : k32, rl_src[1], rl_src[2], rl_src[0], 2, false); break; case Instruction::APUT_OBJECT: { bool is_null = mir_graph_->IsConstantNullRef(rl_src[0]); @@ -661,11 +661,19 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::IGET_WIDE: // kPrimLong and kPrimDouble share the same entrypoints. - GenIGet(mir, opt_flags, k64, Primitive::kPrimLong, rl_dest, rl_src[0]); + if (rl_dest.fp) { + GenIGet(mir, opt_flags, kDouble, Primitive::kPrimDouble, rl_dest, rl_src[0]); + } else { + GenIGet(mir, opt_flags, k64, Primitive::kPrimLong, rl_dest, rl_src[0]); + } break; case Instruction::IGET: - GenIGet(mir, opt_flags, k32, Primitive::kPrimInt, rl_dest, rl_src[0]); + if (rl_dest.fp) { + GenIGet(mir, opt_flags, kSingle, Primitive::kPrimFloat, rl_dest, rl_src[0]); + } else { + GenIGet(mir, opt_flags, k32, Primitive::kPrimInt, rl_dest, rl_src[0]); + } break; case Instruction::IGET_CHAR: @@ -685,7 +693,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list break; case Instruction::IPUT_WIDE: - GenIPut(mir, opt_flags, k64, rl_src[0], rl_src[1]); + GenIPut(mir, opt_flags, rl_src[0].fp ? kDouble : k64, rl_src[0], rl_src[1]); break; case Instruction::IPUT_OBJECT: @@ -693,7 +701,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list break; case Instruction::IPUT: - GenIPut(mir, opt_flags, k32, rl_src[0], rl_src[1]); + GenIPut(mir, opt_flags, rl_src[0].fp ? kSingle : k32, rl_src[0], rl_src[1]); break; case Instruction::IPUT_BYTE: @@ -714,7 +722,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list break; case Instruction::SGET: - GenSget(mir, rl_dest, k32, Primitive::kPrimInt); + GenSget(mir, rl_dest, rl_dest.fp ? kSingle : k32, Primitive::kPrimInt); break; case Instruction::SGET_CHAR: @@ -735,7 +743,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::SGET_WIDE: // kPrimLong and kPrimDouble share the same entrypoints. - GenSget(mir, rl_dest, k64, Primitive::kPrimLong); + GenSget(mir, rl_dest, rl_dest.fp ? kDouble : k64, Primitive::kPrimDouble); break; case Instruction::SPUT_OBJECT: @@ -743,7 +751,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list break; case Instruction::SPUT: - GenSput(mir, rl_src[0], k32); + GenSput(mir, rl_src[0], rl_src[0].fp ? kSingle : k32); break; case Instruction::SPUT_BYTE: @@ -761,7 +769,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::SPUT_WIDE: - GenSput(mir, rl_src[0], k64); + GenSput(mir, rl_src[0], rl_src[0].fp ? kDouble : k64); break; case Instruction::INVOKE_STATIC_RANGE: diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index a2b85ffb6c..5f8a71c2b7 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -874,17 +874,17 @@ class Mir2Lir : public Backend { void CallRuntimeHelperImmMethod(QuickEntrypointEnum trampoline, int arg0, bool safepoint_pc); void CallRuntimeHelperRegMethod(QuickEntrypointEnum trampoline, RegStorage arg0, bool safepoint_pc); - void CallRuntimeHelperRegMethodRegLocation(QuickEntrypointEnum trampoline, RegStorage arg0, - RegLocation arg2, bool safepoint_pc); + void CallRuntimeHelperRegRegLocationMethod(QuickEntrypointEnum trampoline, RegStorage arg0, + RegLocation arg1, bool safepoint_pc); void CallRuntimeHelperRegLocationRegLocation(QuickEntrypointEnum trampoline, RegLocation arg0, RegLocation arg1, bool safepoint_pc); void CallRuntimeHelperRegReg(QuickEntrypointEnum trampoline, RegStorage arg0, RegStorage arg1, bool safepoint_pc); void CallRuntimeHelperRegRegImm(QuickEntrypointEnum trampoline, RegStorage arg0, RegStorage arg1, int arg2, bool safepoint_pc); - void CallRuntimeHelperImmMethodRegLocation(QuickEntrypointEnum trampoline, int arg0, - RegLocation arg2, bool safepoint_pc); - void CallRuntimeHelperImmMethodImm(QuickEntrypointEnum trampoline, int arg0, int arg2, + void CallRuntimeHelperImmRegLocationMethod(QuickEntrypointEnum trampoline, int arg0, + RegLocation arg1, bool safepoint_pc); + void CallRuntimeHelperImmImmMethod(QuickEntrypointEnum trampoline, int arg0, int arg1, bool safepoint_pc); void CallRuntimeHelperImmRegLocationRegLocation(QuickEntrypointEnum trampoline, int arg0, RegLocation arg1, RegLocation arg2, @@ -1482,18 +1482,6 @@ class Mir2Lir : public Backend { return cu_; } /* - * @brief Returns the index of the lowest set bit in 'x'. - * @param x Value to be examined. - * @returns The bit number of the lowest bit set in the value. - */ - int32_t LowestSetBit(uint64_t x); - /* - * @brief Is this value a power of two? - * @param x Value to be examined. - * @returns 'true' if only 1 bit is set in the value. - */ - bool IsPowerOfTwo(uint64_t x); - /* * @brief Do these SRs overlap? * @param rl_op1 One RegLocation * @param rl_op2 The other RegLocation @@ -1741,7 +1729,7 @@ class Mir2Lir : public Backend { int live_sreg_; CodeBuffer code_buffer_; // The source mapping table data (pc -> dex). More entries than in encoded_mapping_table_ - SrcMap src_mapping_table_; + DefaultSrcMap src_mapping_table_; // The encoding mapping table data (dex -> pc offset and pc offset -> dex) with a size prefix. std::vector<uint8_t> encoded_mapping_table_; ArenaVector<uint32_t> core_vmap_table_; diff --git a/compiler/dex/quick/resource_mask.cc b/compiler/dex/quick/resource_mask.cc index 088bec870e..ca68f95411 100644 --- a/compiler/dex/quick/resource_mask.cc +++ b/compiler/dex/quick/resource_mask.cc @@ -19,6 +19,7 @@ #include "resource_mask.h" #include "utils/arena_allocator.h" +#include "utils.h" namespace art { diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 85ab92bc08..ba9c611e9b 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -21,6 +21,7 @@ #include "dex/reg_storage_eq.h" #include "mirror/art_method.h" #include "mirror/array-inl.h" +#include "utils.h" #include "x86_lir.h" namespace art { @@ -656,7 +657,7 @@ RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1); NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg()); OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg); - int shift_amount = LowestSetBit(imm); + int shift_amount = CTZ(imm); OpRegImm(kOpAsr, rl_result.reg, shift_amount); if (imm < 0) { OpReg(kOpNeg, rl_result.reg); @@ -1627,7 +1628,7 @@ bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64 GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1, flags); return true; } else if (IsPowerOfTwo(val)) { - int shift_amount = LowestSetBit(val); + int shift_amount = CTZ(val); if (!PartiallyIntersects(rl_src1, rl_dest)) { rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1, @@ -2070,7 +2071,7 @@ void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src, OpRegReg(kOpAdd, rl_result.reg, rl_src.reg); NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg()); OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg); - int shift_amount = LowestSetBit(imm); + int shift_amount = CTZ(imm); OpRegImm(kOpAsr, rl_result.reg, shift_amount); if (imm < 0) { OpReg(kOpNeg, rl_result.reg); @@ -2319,7 +2320,7 @@ void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> th */ void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale) { - RegisterClass reg_class = RegClassBySize(size); + RegisterClass reg_class = RegClassForFieldLoadStore(size, false); int len_offset = mirror::Array::LengthOffset().Int32Value(); RegLocation rl_result; rl_array = LoadValue(rl_array, kRefReg); @@ -2368,7 +2369,7 @@ void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, */ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { - RegisterClass reg_class = RegClassBySize(size); + RegisterClass reg_class = RegClassForFieldLoadStore(size, false); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 5f6cdda0d3..142acbc8a4 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -562,6 +562,7 @@ void X86Mir2Lir::FreeCallTemps() { FreeTemp(TargetReg32(kArg1)); FreeTemp(TargetReg32(kArg2)); FreeTemp(TargetReg32(kArg3)); + FreeTemp(TargetReg32(kHiddenArg)); if (cu_->target64) { FreeTemp(TargetReg32(kArg4)); FreeTemp(TargetReg32(kArg5)); @@ -797,6 +798,12 @@ bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) { } RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) { + // Prefer XMM registers. Fixes a problem with iget/iput to a FP when cached temporary + // with same VR is a Core register. + if (size == kSingle || size == kDouble) { + return kFPReg; + } + // X86_64 can handle any size. if (cu_->target64) { return RegClassBySize(size); @@ -2209,18 +2216,36 @@ void X86Mir2Lir::GenReduceVector(MIR* mir) { // Handle float case. // TODO Add support for fast math (not value safe) and do horizontal add in that case. + int extract_index = mir->dalvikInsn.arg[0]; + rl_result = EvalLoc(rl_dest, kFPReg, true); NewLIR2(kX86PxorRR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); - NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), vector_src.GetReg()); - // Since FP must keep order of operation for value safety, we shift to low - // 32-bits and add to result. - for (int i = 0; i < 3; i++) { - NewLIR3(kX86ShufpsRRI, vector_src.GetReg(), vector_src.GetReg(), 0x39); + if (LIKELY(extract_index != 0)) { + // We know the index of element which we want to extract. We want to extract it and + // keep values in vector register correct for future use. So the way we act is: + // 1. Generate shuffle mask that allows to swap zeroth and required elements; + // 2. Shuffle vector register with this mask; + // 3. Extract zeroth element where required value lies; + // 4. Shuffle with same mask again to restore original values in vector register. + // The mask is generated from equivalence mask 0b11100100 swapping 0th and extracted + // element indices. + int shuffle[4] = {0b00, 0b01, 0b10, 0b11}; + shuffle[0] = extract_index; + shuffle[extract_index] = 0; + int mask = 0; + for (int i = 0; i < 4; i++) { + mask |= (shuffle[i] << (2 * i)); + } + NewLIR3(kX86ShufpsRRI, vector_src.GetReg(), vector_src.GetReg(), mask); + NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), vector_src.GetReg()); + NewLIR3(kX86ShufpsRRI, vector_src.GetReg(), vector_src.GetReg(), mask); + } else { + // We need to extract zeroth element and don't need any complex stuff to do it. NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), vector_src.GetReg()); } - StoreValue(rl_dest, rl_result); + StoreFinalValue(rl_dest, rl_result); } else if (opsize == kDouble) { // TODO Handle double case. LOG(FATAL) << "Unsupported add reduce for double."; diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc index 7cd431e26c..3905649ac6 100644 --- a/compiler/dex/ssa_transformation.cc +++ b/compiler/dex/ssa_transformation.cc @@ -103,7 +103,7 @@ void MIRGraph::ComputeDFSOrders() { num_reachable_blocks_ = dfs_order_.size(); - if (num_reachable_blocks_ != num_blocks_) { + if (num_reachable_blocks_ != GetNumBlocks()) { // Kill all unreachable blocks. AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { @@ -173,9 +173,9 @@ void MIRGraph::ComputeDomPostOrderTraversal(BasicBlock* bb) { dom_post_order_traversal_.reserve(num_reachable_blocks_); ClearAllVisitedFlags(); - DCHECK(temp_scoped_alloc_.get() != nullptr); + ScopedArenaAllocator allocator(&cu_->arena_stack); ScopedArenaVector<std::pair<BasicBlock*, ArenaBitVector::IndexIterator>> work_stack( - temp_scoped_alloc_->Adapter()); + allocator.Adapter()); bb->visited = true; work_stack.push_back(std::make_pair(bb, bb->i_dominated->Indexes().begin())); while (!work_stack.empty()) { @@ -402,6 +402,8 @@ void MIRGraph::ComputeDominators() { for (BasicBlock* bb = iter5.Next(); bb != NULL; bb = iter5.Next()) { ComputeDominanceFrontier(bb); } + + domination_up_to_date_ = true; } /* diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index cbb23c26b9..9985d66469 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -62,6 +62,7 @@ #include "thread_pool.h" #include "trampolines/trampoline_compiler.h" #include "transaction.h" +#include "utils/swap_space.h" #include "verifier/method_verifier.h" #include "verifier/method_verifier-inl.h" @@ -339,8 +340,10 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, bool image, std::set<std::string>* image_classes, std::set<std::string>* compiled_classes, size_t thread_count, bool dump_stats, bool dump_passes, CumulativeLogger* timer, - const std::string& profile_file) - : profile_present_(false), compiler_options_(compiler_options), + int swap_fd, const std::string& profile_file) + : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)), + swap_space_allocator_(new SwapAllocator<void>(swap_space_.get())), + profile_present_(false), compiler_options_(compiler_options), verification_results_(verification_results), method_inliner_map_(method_inliner_map), compiler_(Compiler::Create(this, compiler_kind)), @@ -349,7 +352,7 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, freezing_constructor_lock_("freezing constructor lock"), compiled_classes_lock_("compiled classes lock"), compiled_methods_lock_("compiled method lock"), - compiled_methods_(), + compiled_methods_(MethodTable::key_compare()), non_relative_linker_patch_count_(0u), image_(image), image_classes_(image_classes), @@ -361,12 +364,12 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, timings_logger_(timer), compiler_context_(nullptr), support_boot_image_fixup_(instruction_set != kMips), - dedupe_code_("dedupe code"), - dedupe_src_mapping_table_("dedupe source mapping table"), - dedupe_mapping_table_("dedupe mapping table"), - dedupe_vmap_table_("dedupe vmap table"), - dedupe_gc_map_("dedupe gc map"), - dedupe_cfi_info_("dedupe cfi info") { + dedupe_code_("dedupe code", *swap_space_allocator_), + dedupe_src_mapping_table_("dedupe source mapping table", *swap_space_allocator_), + dedupe_mapping_table_("dedupe mapping table", *swap_space_allocator_), + dedupe_vmap_table_("dedupe vmap table", *swap_space_allocator_), + dedupe_gc_map_("dedupe gc map", *swap_space_allocator_), + dedupe_cfi_info_("dedupe cfi info", *swap_space_allocator_) { DCHECK(compiler_options_ != nullptr); DCHECK(verification_results_ != nullptr); DCHECK(method_inliner_map_ != nullptr); @@ -393,31 +396,28 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, } } -std::vector<uint8_t>* CompilerDriver::DeduplicateCode(const std::vector<uint8_t>& code) { +SwapVector<uint8_t>* CompilerDriver::DeduplicateCode(const ArrayRef<const uint8_t>& code) { return dedupe_code_.Add(Thread::Current(), code); } -SrcMap* CompilerDriver::DeduplicateSrcMappingTable(const SrcMap& src_map) { +SwapSrcMap* CompilerDriver::DeduplicateSrcMappingTable(const ArrayRef<SrcMapElem>& src_map) { return dedupe_src_mapping_table_.Add(Thread::Current(), src_map); } -std::vector<uint8_t>* CompilerDriver::DeduplicateMappingTable(const std::vector<uint8_t>& code) { +SwapVector<uint8_t>* CompilerDriver::DeduplicateMappingTable(const ArrayRef<const uint8_t>& code) { return dedupe_mapping_table_.Add(Thread::Current(), code); } -std::vector<uint8_t>* CompilerDriver::DeduplicateVMapTable(const std::vector<uint8_t>& code) { +SwapVector<uint8_t>* CompilerDriver::DeduplicateVMapTable(const ArrayRef<const uint8_t>& code) { return dedupe_vmap_table_.Add(Thread::Current(), code); } -std::vector<uint8_t>* CompilerDriver::DeduplicateGCMap(const std::vector<uint8_t>& code) { +SwapVector<uint8_t>* CompilerDriver::DeduplicateGCMap(const ArrayRef<const uint8_t>& code) { return dedupe_gc_map_.Add(Thread::Current(), code); } -std::vector<uint8_t>* CompilerDriver::DeduplicateCFIInfo(const std::vector<uint8_t>* cfi_info) { - if (cfi_info == nullptr) { - return nullptr; - } - return dedupe_cfi_info_.Add(Thread::Current(), *cfi_info); +SwapVector<uint8_t>* CompilerDriver::DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info) { + return dedupe_cfi_info_.Add(Thread::Current(), cfi_info); } CompilerDriver::~CompilerDriver() { @@ -428,7 +428,9 @@ CompilerDriver::~CompilerDriver() { } { MutexLock mu(self, compiled_methods_lock_); - STLDeleteValues(&compiled_methods_); + for (auto& pair : compiled_methods_) { + CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, pair.second); + } } compiler_->UnInit(); } @@ -2337,6 +2339,14 @@ std::string CompilerDriver::GetMemoryUsageString() const { oss << " native alloc=" << PrettySize(allocated_space) << " free=" << PrettySize(free_space); #endif + if (swap_space_.get() != nullptr) { + oss << " swap=" << PrettySize(swap_space_->GetSize()); + } + oss << "\nCode dedupe: " << dedupe_code_.DumpStats(); + oss << "\nMapping table dedupe: " << dedupe_mapping_table_.DumpStats(); + oss << "\nVmap table dedupe: " << dedupe_vmap_table_.DumpStats(); + oss << "\nGC map dedupe: " << dedupe_gc_map_.DumpStats(); + oss << "\nCFI info dedupe: " << dedupe_cfi_info_.DumpStats(); return oss.str(); } diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index edc6468a85..7ddc32cdd8 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -39,6 +39,8 @@ #include "thread_pool.h" #include "utils/arena_allocator.h" #include "utils/dedupe_set.h" +#include "utils/swap_space.h" +#include "utils.h" #include "dex/verified_method.h" namespace art { @@ -77,6 +79,8 @@ enum DexToDexCompilationLevel { }; std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs); +static constexpr bool kUseMurmur3Hash = true; + class CompilerDriver { public: // Create a compiler targeting the requested "instruction_set". @@ -93,7 +97,8 @@ class CompilerDriver { bool image, std::set<std::string>* image_classes, std::set<std::string>* compiled_classes, size_t thread_count, bool dump_stats, bool dump_passes, - CumulativeLogger* timer, const std::string& profile_file); + CumulativeLogger* timer, int swap_fd, + const std::string& profile_file); ~CompilerDriver(); @@ -334,6 +339,9 @@ class CompilerDriver { const ArenaPool* GetArenaPool() const { return &arena_pool_; } + SwapAllocator<void>& GetSwapSpaceAllocator() { + return *swap_space_allocator_.get(); + } bool WriteElf(const std::string& android_root, bool is_host, @@ -376,15 +384,12 @@ class CompilerDriver { void RecordClassStatus(ClassReference ref, mirror::Class::Status status) LOCKS_EXCLUDED(compiled_classes_lock_); - std::vector<uint8_t>* DeduplicateCode(const std::vector<uint8_t>& code); - SrcMap* DeduplicateSrcMappingTable(const SrcMap& src_map); - std::vector<uint8_t>* DeduplicateMappingTable(const std::vector<uint8_t>& code); - std::vector<uint8_t>* DeduplicateVMapTable(const std::vector<uint8_t>& code); - std::vector<uint8_t>* DeduplicateGCMap(const std::vector<uint8_t>& code); - std::vector<uint8_t>* DeduplicateCFIInfo(const std::vector<uint8_t>* cfi_info); - - ProfileFile profile_file_; - bool profile_present_; + SwapVector<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code); + SwapSrcMap* DeduplicateSrcMappingTable(const ArrayRef<SrcMapElem>& src_map); + SwapVector<uint8_t>* DeduplicateMappingTable(const ArrayRef<const uint8_t>& code); + SwapVector<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& code); + SwapVector<uint8_t>* DeduplicateGCMap(const ArrayRef<const uint8_t>& code); + SwapVector<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info); // Should the compiler run on this method given profile information? bool SkipCompilation(const std::string& method_name); @@ -484,6 +489,14 @@ class CompilerDriver { static void CompileClass(const ParallelCompilationManager* context, size_t class_def_index) LOCKS_EXCLUDED(Locks::mutator_lock_); + // Swap pool and allocator used for native allocations. May be file-backed. Needs to be first + // as other fields rely on this. + std::unique_ptr<SwapSpace> swap_space_; + std::unique_ptr<SwapAllocator<void> > swap_space_allocator_; + + ProfileFile profile_file_; + bool profile_present_; + const CompilerOptions* const compiler_options_; VerificationResults* const verification_results_; DexFileToMethodInlinerMap* const method_inliner_map_; @@ -551,47 +564,92 @@ class CompilerDriver { bool support_boot_image_fixup_; // DeDuplication data structures, these own the corresponding byte arrays. - template <typename ByteArray> + template <typename ContentType> class DedupeHashFunc { public: - size_t operator()(const ByteArray& array) const { - // For small arrays compute a hash using every byte. - static const size_t kSmallArrayThreshold = 16; - size_t hash = 0x811c9dc5; - if (array.size() <= kSmallArrayThreshold) { - for (auto b : array) { - hash = (hash * 16777619) ^ static_cast<uint8_t>(b); + size_t operator()(const ArrayRef<ContentType>& array) const { + const uint8_t* data = reinterpret_cast<const uint8_t*>(array.data()); + static_assert(IsPowerOfTwo(sizeof(ContentType)), + "ContentType is not power of two, don't know whether array layout is as assumed"); + uint32_t len = sizeof(ContentType) * array.size(); + if (kUseMurmur3Hash) { + static constexpr uint32_t c1 = 0xcc9e2d51; + static constexpr uint32_t c2 = 0x1b873593; + static constexpr uint32_t r1 = 15; + static constexpr uint32_t r2 = 13; + static constexpr uint32_t m = 5; + static constexpr uint32_t n = 0xe6546b64; + + uint32_t hash = 0; + + const int nblocks = len / 4; + typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; + const unaligned_uint32_t *blocks = reinterpret_cast<const uint32_t*>(data); + int i; + for (i = 0; i < nblocks; i++) { + uint32_t k = blocks[i]; + k *= c1; + k = (k << r1) | (k >> (32 - r1)); + k *= c2; + + hash ^= k; + hash = ((hash << r2) | (hash >> (32 - r2))) * m + n; } - } else { - // For larger arrays use the 2 bytes at 6 bytes (the location of a push registers - // instruction field for quick generated code on ARM) and then select a number of other - // values at random. - static const size_t kRandomHashCount = 16; - for (size_t i = 0; i < 2; ++i) { - uint8_t b = static_cast<uint8_t>(array[i + 6]); - hash = (hash * 16777619) ^ b; + + const uint8_t *tail = reinterpret_cast<const uint8_t*>(data + nblocks * 4); + uint32_t k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + FALLTHROUGH_INTENDED; + case 2: + k1 ^= tail[1] << 8; + FALLTHROUGH_INTENDED; + case 1: + k1 ^= tail[0]; + + k1 *= c1; + k1 = (k1 << r1) | (k1 >> (32 - r1)); + k1 *= c2; + hash ^= k1; } - for (size_t i = 2; i < kRandomHashCount; ++i) { - size_t r = i * 1103515245 + 12345; - uint8_t b = static_cast<uint8_t>(array[r % array.size()]); - hash = (hash * 16777619) ^ b; + + hash ^= len; + hash ^= (hash >> 16); + hash *= 0x85ebca6b; + hash ^= (hash >> 13); + hash *= 0xc2b2ae35; + hash ^= (hash >> 16); + + return hash; + } else { + size_t hash = 0x811c9dc5; + for (uint32_t i = 0; i < len; ++i) { + hash = (hash * 16777619) ^ data[i]; } + hash += hash << 13; + hash ^= hash >> 7; + hash += hash << 3; + hash ^= hash >> 17; + hash += hash << 5; + return hash; } - hash += hash << 13; - hash ^= hash >> 7; - hash += hash << 3; - hash ^= hash >> 17; - hash += hash << 5; - return hash; } }; - DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc<std::vector<uint8_t>>, 4> dedupe_code_; - DedupeSet<SrcMap, size_t, DedupeHashFunc<SrcMap>, 4> dedupe_src_mapping_table_; - DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc<std::vector<uint8_t>>, 4> dedupe_mapping_table_; - DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc<std::vector<uint8_t>>, 4> dedupe_vmap_table_; - DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc<std::vector<uint8_t>>, 4> dedupe_gc_map_; - DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc<std::vector<uint8_t>>, 4> dedupe_cfi_info_; + DedupeSet<ArrayRef<const uint8_t>, + SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_code_; + DedupeSet<ArrayRef<SrcMapElem>, + SwapSrcMap, size_t, DedupeHashFunc<SrcMapElem>, 4> dedupe_src_mapping_table_; + DedupeSet<ArrayRef<const uint8_t>, + SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_mapping_table_; + DedupeSet<ArrayRef<const uint8_t>, + SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_vmap_table_; + DedupeSet<ArrayRef<const uint8_t>, + SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_gc_map_; + DedupeSet<ArrayRef<const uint8_t>, + SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_cfi_info_; DISALLOW_COPY_AND_ASSIGN(CompilerDriver); }; diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index d651c0fb84..9ec4f281cb 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -358,8 +358,8 @@ class LineTableGenerator FINAL : public Leb128Encoder { }; // TODO: rewriting it using DexFile::DecodeDebugInfo needs unneeded stuff. -static void GetLineInfoForJava(const uint8_t* dbgstream, const SrcMap& pc2dex, - SrcMap* result, uint32_t start_pc = 0) { +static void GetLineInfoForJava(const uint8_t* dbgstream, const SwapSrcMap& pc2dex, + DefaultSrcMap* result, uint32_t start_pc = 0) { if (dbgstream == nullptr) { return; } @@ -415,7 +415,7 @@ static void GetLineInfoForJava(const uint8_t* dbgstream, const SrcMap& pc2dex, dex_offset += adjopcode / DexFile::DBG_LINE_RANGE; java_line += DexFile::DBG_LINE_BASE + (adjopcode % DexFile::DBG_LINE_RANGE); - for (SrcMap::const_iterator found = pc2dex.FindByTo(dex_offset); + for (SwapSrcMap::const_iterator found = pc2dex.FindByTo(dex_offset); found != pc2dex.end() && found->to_ == static_cast<int32_t>(dex_offset); found++) { result->push_back({found->from_ + start_pc, static_cast<int32_t>(java_line)}); @@ -615,7 +615,7 @@ static void FillInCFIInformation(OatWriter* oat_writer, LineTableGenerator line_table_generator(LINE_BASE, LINE_RANGE, OPCODE_BASE, dbg_line, 0, 1); - SrcMap pc2java_map; + DefaultSrcMap pc2java_map; for (size_t i = 0; i < method_info.size(); ++i) { const OatWriter::DebugInfo &dbg = method_info[i]; const char* file_name = (dbg.src_file_name_ == nullptr) ? "null" : dbg.src_file_name_; @@ -700,7 +700,7 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver, DCHECK(it->compiled_method_ != nullptr); // Copy in the FDE, if present - const std::vector<uint8_t>* fde = it->compiled_method_->GetCFIInfo(); + const SwapVector<uint8_t>* fde = it->compiled_method_->GetCFIInfo(); if (fde != nullptr) { // Copy the information into cfi_info and then fix the address in the new copy. int cur_offset = cfi_info->size(); diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index c3fe75b3f1..bf996a251f 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -430,13 +430,18 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, MemoryRegion code(&managed_code[0], managed_code.size()); __ FinalizeInstructions(code); jni_asm->FinalizeFrameDescriptionEntry(); - return new CompiledMethod(driver, - instruction_set, - managed_code, - frame_size, - main_jni_conv->CoreSpillMask(), - main_jni_conv->FpSpillMask(), - jni_asm->GetFrameDescriptionEntry()); + std::vector<uint8_t>* fde(jni_asm->GetFrameDescriptionEntry()); + ArrayRef<const uint8_t> cfi_ref; + if (fde != nullptr) { + cfi_ref = ArrayRef<const uint8_t>(*fde); + } + return CompiledMethod::SwapAllocCompiledMethodCFI(driver, + instruction_set, + ArrayRef<const uint8_t>(managed_code), + frame_size, + main_jni_conv->CoreSpillMask(), + main_jni_conv->FpSpillMask(), + cfi_ref); } // Copy a single parameter from the managed to the JNI calling convention diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 3ca0cdf011..d14153872b 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -59,7 +59,7 @@ class OatTest : public CommonCompilerTest { EXPECT_EQ(oat_method.GetFpSpillMask(), compiled_method->GetFpSpillMask()); uintptr_t oat_code_aligned = RoundDown(reinterpret_cast<uintptr_t>(quick_oat_code), 2); quick_oat_code = reinterpret_cast<const void*>(oat_code_aligned); - const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode(); + const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode(); EXPECT_TRUE(quick_code != nullptr); size_t code_size = quick_code->size() * sizeof(quick_code[0]); EXPECT_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size)) @@ -92,7 +92,7 @@ TEST_F(OatTest, WriteRead) { method_inliner_map_.get(), compiler_kind, insn_set, insn_features.get(), false, nullptr, nullptr, 2, true, - true, timer_.get(), "")); + true, timer_.get(), -1, "")); jobject class_loader = nullptr; if (kCompile) { TimingLogger timings2("OatTest::WriteRead", false, false); diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 7d14de1306..3c36ffa4e9 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -504,7 +504,7 @@ OatWriter::~OatWriter() { } struct OatWriter::GcMapDataAccess { - static const std::vector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE { + static const SwapVector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE { return compiled_method->GetGcMap(); } @@ -526,7 +526,7 @@ struct OatWriter::GcMapDataAccess { }; struct OatWriter::MappingTableDataAccess { - static const std::vector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE { + static const SwapVector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE { return compiled_method->GetMappingTable(); } @@ -548,7 +548,7 @@ struct OatWriter::MappingTableDataAccess { }; struct OatWriter::VmapTableDataAccess { - static const std::vector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE { + static const SwapVector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE { return &compiled_method->GetVmapTable(); } @@ -719,7 +719,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { // Derived from CompiledMethod. uint32_t quick_code_offset = 0; - const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode(); + const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode(); CHECK(quick_code != nullptr); offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, compiled_method); offset_ = compiled_method->AlignCode(offset_); @@ -829,7 +829,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { } else { status = mirror::Class::kStatusNotReady; } - std::vector<uint8_t> const * gc_map = compiled_method->GetGcMap(); + const SwapVector<uint8_t>* gc_map = compiled_method->GetGcMap(); if (gc_map != nullptr) { size_t gc_map_size = gc_map->size() * sizeof(gc_map[0]); bool is_native = it.MemberIsNative(); @@ -871,7 +871,7 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size()); DCHECK_EQ(DataAccess::GetOffset(oat_class, method_offsets_index_), 0u); - const std::vector<uint8_t>* map = DataAccess::GetData(compiled_method); + const SwapVector<uint8_t>* map = DataAccess::GetData(compiled_method); uint32_t map_size = map == nullptr ? 0 : map->size() * sizeof((*map)[0]); if (map_size != 0u) { auto lb = dedupe_map_.lower_bound(map); @@ -893,7 +893,7 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { private: // Deduplication is already done on a pointer basis by the compiler driver, // so we can simply compare the pointers to find out if things are duplicated. - SafeMap<const std::vector<uint8_t>*, uint32_t> dedupe_map_; + SafeMap<const SwapVector<uint8_t>*, uint32_t> dedupe_map_; }; class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { @@ -990,8 +990,11 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { size_t file_offset = file_offset_; OutputStream* out = out_; - const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode(); + const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode(); if (quick_code != nullptr) { + // Need a wrapper if we create a copy for patching. + ArrayRef<const uint8_t> wrapped(*quick_code); + offset_ = writer_->relative_call_patcher_->WriteThunks(out, offset_); if (offset_ == 0u) { ReportWriteFailure("relative call thunk", it); @@ -1030,8 +1033,8 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { DCHECK_OFFSET_(); if (!compiled_method->GetPatches().empty()) { - patched_code_ = *quick_code; - quick_code = &patched_code_; + patched_code_ = std::vector<uint8_t>(quick_code->begin(), quick_code->end()); + wrapped = ArrayRef<const uint8_t>(patched_code_); for (const LinkerPatch& patch : compiled_method->GetPatches()) { if (patch.Type() == kLinkerPatchCallRelative) { // NOTE: Relative calls across oat files are not supported. @@ -1052,8 +1055,8 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } } - writer_->oat_header_->UpdateChecksum(&(*quick_code)[0], code_size); - if (!out->WriteFully(&(*quick_code)[0], code_size)) { + writer_->oat_header_->UpdateChecksum(wrapped.data(), code_size); + if (!out->WriteFully(wrapped.data(), code_size)) { ReportWriteFailure("method code", it); return false; } @@ -1170,7 +1173,7 @@ class OatWriter::WriteMapMethodVisitor : public OatDexMethodVisitor { ++method_offsets_index_; // Write deduplicated map. - const std::vector<uint8_t>* map = DataAccess::GetData(compiled_method); + const SwapVector<uint8_t>* map = DataAccess::GetData(compiled_method); size_t map_size = map == nullptr ? 0 : map->size() * sizeof((*map)[0]); DCHECK((map_size == 0u && map_offset == 0u) || (map_size != 0u && map_offset != 0u && map_offset <= offset_)) diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index f9054e0133..f6ca6c740e 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -670,10 +670,6 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedField); return false; } - if (resolved_field->IsVolatile()) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile); - return false; - } Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType(); @@ -689,20 +685,20 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, null_check, value, field_type, - resolved_field->GetOffset())); + resolved_field->GetOffset(), + resolved_field->IsVolatile())); } else { current_block_->AddInstruction(new (arena_) HInstanceFieldGet( current_block_->GetLastInstruction(), field_type, - resolved_field->GetOffset())); + resolved_field->GetOffset(), + resolved_field->IsVolatile())); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } return true; } - - bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put) { @@ -723,11 +719,6 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, return false; } - if (resolved_field->IsVolatile()) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile); - return false; - } - Handle<mirror::Class> referrer_class(hs.NewHandle(compiler_driver_->ResolveCompilingMethodsClass( soa, dex_cache, class_loader, outer_compilation_unit_))); @@ -763,10 +754,12 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, HInstruction* value = LoadLocal(source_or_dest_reg, field_type); DCHECK_EQ(value->GetType(), field_type); current_block_->AddInstruction( - new (arena_) HStaticFieldSet(cls, value, field_type, resolved_field->GetOffset())); + new (arena_) HStaticFieldSet(cls, value, field_type, resolved_field->GetOffset(), + resolved_field->IsVolatile())); } else { current_block_->AddInstruction( - new (arena_) HStaticFieldGet(cls, field_type, resolved_field->GetOffset())); + new (arena_) HStaticFieldGet(cls, field_type, resolved_field->GetOffset(), + resolved_field->IsVolatile())); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } return true; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 6f424ce11d..4d8154e6a0 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -327,11 +327,13 @@ bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) con CodeGenerator* CodeGenerator::Create(ArenaAllocator* allocator, HGraph* graph, - InstructionSet instruction_set) { + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features) { switch (instruction_set) { case kArm: case kThumb2: { - return new (allocator) arm::CodeGeneratorARM(graph); + return new (allocator) arm::CodeGeneratorARM(graph, + isa_features.AsArmInstructionSetFeatures()); } case kArm64: { return new (allocator) arm64::CodeGeneratorARM64(graph); @@ -374,7 +376,7 @@ void CodeGenerator::BuildNativeGCMap( } } -void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, SrcMap* src_map) const { +void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* src_map) const { uint32_t pc2dex_data_size = 0u; uint32_t pc2dex_entries = pc_infos_.Size(); uint32_t pc2dex_offset = 0u; diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 1d42c47d56..4205ebebf9 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ #include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" #include "base/bit_field.h" #include "globals.h" #include "locations.h" @@ -44,7 +45,10 @@ class Assembler; class CodeGenerator; class DexCompilationUnit; class ParallelMoveResolver; +class SrcMapElem; +template <class Alloc> class SrcMap; +using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; class CodeAllocator { public: @@ -81,7 +85,8 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { void CompileOptimized(CodeAllocator* allocator); static CodeGenerator* Create(ArenaAllocator* allocator, HGraph* graph, - InstructionSet instruction_set); + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features); HGraph* GetGraph() const { return graph_; } @@ -146,7 +151,7 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { void GenerateSlowPaths(); - void BuildMappingTable(std::vector<uint8_t>* vector, SrcMap* src_map) const; + void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const; void BuildVMapTable(std::vector<uint8_t>* vector) const; void BuildNativeGCMap( std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 002d9d4449..3b3fb64763 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -16,6 +16,7 @@ #include "code_generator_arm.h" +#include "arch/arm/instruction_set_features_arm.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "mirror/array-inl.h" @@ -255,8 +256,8 @@ class LoadStringSlowPathARM : public SlowPathCodeARM { codegen->SaveLiveRegisters(locations); InvokeRuntimeCallingConvention calling_convention; - arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(0)); - __ LoadImmediate(calling_convention.GetRegisterAt(1), instruction_->GetStringIndex()); + arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); + __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc()); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); @@ -372,13 +373,15 @@ size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id return kArmWordSize; } -CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) +CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, + const ArmInstructionSetFeatures* isa_features) : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(true) {} + assembler_(true), + isa_features_(isa_features) {} size_t CodeGeneratorARM::FrameEntrySpillSize() const { return kNumberOfPushedRegistersAtEntry * kArmWordSize; @@ -2409,14 +2412,14 @@ void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetOut(Location::RegisterLocation(R0)); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); + codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(2)); __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pAllocArrayWithAccessCheck), instruction, instruction->GetDexPc()); @@ -2556,68 +2559,170 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unreachable"; } -void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) { + // TODO (ported from quick): revisit Arm barrier kinds + DmbOptions flavour = DmbOptions::ISH; // quiet c++ warnings + switch (kind) { + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kAnyAny: { + flavour = DmbOptions::ISH; + break; + } + case MemBarrierKind::kStoreStore: { + flavour = DmbOptions::ISHST; + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } + __ dmb(flavour); +} + +void InstructionCodeGeneratorARM::GenerateWideAtomicLoad(Register addr, + uint32_t offset, + Register out_lo, + Register out_hi) { + if (offset != 0) { + __ LoadImmediate(out_lo, offset); + __ add(addr, addr, ShifterOperand(out_lo)); + } + __ ldrexd(out_lo, out_hi, addr); +} + +void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr, + uint32_t offset, + Register value_lo, + Register value_hi, + Register temp1, + Register temp2) { + Label fail; + if (offset != 0) { + __ LoadImmediate(temp1, offset); + __ add(addr, addr, ShifterOperand(temp1)); + } + __ Bind(&fail); + // We need a load followed by store. (The address used in a STREX instruction must + // be the same as the address in the most recently executed LDREX instruction.) + __ ldrexd(temp1, temp2, addr); + __ strexd(temp1, value_lo, value_hi, addr); + __ cmp(temp1, ShifterOperand(0)); + __ b(&fail, NE); +} + +void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue()); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); + + + Primitive::Type field_type = field_info.GetFieldType(); + bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; + bool generate_volatile = field_info.IsVolatile() + && is_wide + && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); // Temporary registers for the write barrier. - if (needs_write_barrier) { + // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else if (generate_volatile) { + // Arm encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever + // enable Arm encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + if (field_type == Primitive::kPrimDouble) { + // For doubles we need two more registers to copy the value. + locations->AddTemp(Location::RegisterLocation(R2)); + locations->AddTemp(Location::RegisterLocation(R3)); + } } } -void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location value = locations->InAt(1); + + bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreByte, value, obj, offset); + __ StoreToOffset(kStoreByte, value.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreHalfword, value, obj, offset); + __ StoreToOffset(kStoreHalfword, value.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreWord, value, obj, offset); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { + Register value_reg = value.AsRegister<Register>(); + __ StoreToOffset(kStoreWord, value_reg, base, offset); + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { Register temp = locations->GetTemp(0).AsRegister<Register>(); Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, obj, value); + codegen_->MarkGCCard(temp, card, base, value_reg); } break; } case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), obj, offset); + if (is_volatile && !atomic_ldrd_strd) { + GenerateWideAtomicStore(base, offset, + value.AsRegisterPairLow<Register>(), + value.AsRegisterPairHigh<Register>(), + locations->GetTemp(0).AsRegister<Register>(), + locations->GetTemp(1).AsRegister<Register>()); + } else { + __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), base, offset); + } break; } case Primitive::kPrimFloat: { - SRegister value = locations->InAt(1).AsFpuRegister<SRegister>(); - __ StoreSToOffset(value, obj, offset); + __ StoreSToOffset(value.AsFpuRegister<SRegister>(), base, offset); break; } case Primitive::kPrimDouble: { - DRegister value = FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()); - __ StoreDToOffset(value, obj, offset); + DRegister value_reg = FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()); + if (is_volatile && !atomic_ldrd_strd) { + Register value_reg_lo = locations->GetTemp(0).AsRegister<Register>(); + Register value_reg_hi = locations->GetTemp(1).AsRegister<Register>(); + + __ vmovrrd(value_reg_lo, value_reg_hi, value_reg); + + GenerateWideAtomicStore(base, offset, + value_reg_lo, + value_reg_hi, + locations->GetTemp(2).AsRegister<Register>(), + locations->GetTemp(3).AsRegister<Register>()); + } else { + __ StoreDToOffset(value_reg, base, offset); + } break; } @@ -2625,75 +2730,142 @@ void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } } -void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + bool generate_volatile = field_info.IsVolatile() + && (field_info.GetFieldType() == Primitive::kPrimDouble) + && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); + if (generate_volatile) { + // Arm encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever + // enable Arm encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } } -void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { + switch (field_type) { case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedByte, out, obj, offset); + __ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, obj, offset); + __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset); break; } case Primitive::kPrimLong: { - // TODO: support volatile. - Location out = locations->Out(); - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset); + if (is_volatile && !atomic_ldrd_strd) { + GenerateWideAtomicLoad(base, offset, + out.AsRegisterPairLow<Register>(), + out.AsRegisterPairHigh<Register>()); + } else { + __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset); + } break; } case Primitive::kPrimFloat: { - SRegister out = locations->Out().AsFpuRegister<SRegister>(); - __ LoadSFromOffset(out, obj, offset); + __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset); break; } case Primitive::kPrimDouble: { - DRegister out = FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()); - __ LoadDFromOffset(out, obj, offset); + DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()); + if (is_volatile && !atomic_ldrd_strd) { + Register lo = locations->GetTemp(0).AsRegister<Register>(); + Register hi = locations->GetTemp(1).AsRegister<Register>(); + GenerateWideAtomicLoad(base, offset, lo, hi); + __ vmovdrr(out_reg, lo, hi); + } else { + __ LoadDFromOffset(out_reg, base, offset); + } break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } +} + +void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) { @@ -3206,146 +3378,6 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck( __ Bind(slow_path->GetExitLabel()); } -void LocationsBuilderARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedByte, out, cls, offset); - break; - } - - case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedByte, out, cls, offset); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadSignedHalfword, out, cls, offset); - break; - } - - case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadUnsignedHalfword, out, cls, offset); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, cls, offset); - break; - } - - case Primitive::kPrimLong: { - // TODO: support volatile. - Location out = locations->Out(); - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), cls, offset); - break; - } - - case Primitive::kPrimFloat: { - SRegister out = locations->Out().AsFpuRegister<SRegister>(); - __ LoadSFromOffset(out, cls, offset); - break; - } - - case Primitive::kPrimDouble: { - DRegister out = FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()); - __ LoadDFromOffset(out, cls, offset); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue()); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreByte, value, cls, offset); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreHalfword, value, cls, offset); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(kStoreWord, value, cls, offset); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), cls, offset); - break; - } - - case Primitive::kPrimFloat: { - SRegister value = locations->InAt(1).AsFpuRegister<SRegister>(); - __ StoreSToOffset(value, cls, offset); - break; - } - - case Primitive::kPrimDouble: { - DRegister value = FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()); - __ StoreDToOffset(value, cls, offset); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderARM::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 226e635d05..40f4edc4eb 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm/assembler_thumb2.h" @@ -110,6 +111,8 @@ class LocationsBuilderARM : public HGraphVisitor { void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorARM* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -138,6 +141,15 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void GenerateMemoryBarrier(MemBarrierKind kind); + void GenerateWideAtomicStore(Register addr, uint32_t offset, + Register value_lo, Register value_hi, + Register temp1, Register temp2); + void GenerateWideAtomicLoad(Register addr, uint32_t offset, + Register out_lo, Register out_hi); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + ArmAssembler* const assembler_; CodeGeneratorARM* const codegen_; @@ -147,7 +159,7 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { class CodeGeneratorARM : public CodeGenerator { public: - explicit CodeGeneratorARM(HGraph* graph); + CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures* isa_features); virtual ~CodeGeneratorARM() {} void GenerateFrameEntry() OVERRIDE; @@ -221,6 +233,10 @@ class CodeGeneratorARM : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + const ArmInstructionSetFeatures* GetInstructionSetFeatures() const { + return isa_features_; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -228,6 +244,7 @@ class CodeGeneratorARM : public CodeGenerator { InstructionCodeGeneratorARM instruction_visitor_; ParallelMoveResolverARM move_resolver_; Thumb2Assembler assembler_; + const ArmInstructionSetFeatures* isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); }; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index c7517d3abc..7b19f44e78 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -17,6 +17,7 @@ #include "code_generator_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" #include "mirror/array-inl.h" #include "mirror/art_method.h" @@ -38,6 +39,9 @@ namespace art { namespace arm64 { +// TODO: Tune the use of Load-Acquire, Store-Release vs Data Memory Barriers. +// For now we prefer the use of load-acquire, store-release over explicit memory barriers. +static constexpr bool kUseAcquireRelease = true; static constexpr bool kExplicitStackOverflowCheck = false; static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>); static constexpr int kCurrentMethodStackOffset = 0; @@ -233,8 +237,9 @@ Location ARM64ReturnLocation(Primitive::Type return_type) { static const Register kRuntimeParameterCoreRegisters[] = { x0, x1, x2, x3, x4, x5, x6, x7 }; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); -static const FPRegister kRuntimeParameterFpuRegisters[] = { }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = 0; +static const FPRegister kRuntimeParameterFpuRegisters[] = { d0, d1, d2, d3, d4, d5, d6, d7 }; +static constexpr size_t kRuntimeParameterFpuRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); class InvokeRuntimeCallingConvention : public CallingConvention<Register, FPRegister> { public: @@ -294,6 +299,7 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { length_location_, LocationFrom(calling_convention.GetRegisterAt(1))); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc()); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } private: @@ -313,6 +319,7 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { __ Bind(GetEntryLabel()); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc()); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } private: @@ -343,6 +350,11 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t, mirror::ArtMethod*>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -386,10 +398,11 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { codegen->SaveLiveRegisters(locations); InvokeRuntimeCallingConvention calling_convention; - arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(0).W()); - __ Mov(calling_convention.GetRegisterAt(1).W(), instruction_->GetStringIndex()); + arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W()); + __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex()); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t, mirror::ArtMethod*>(); Primitive::Type type = instruction_->GetType(); arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); @@ -412,6 +425,7 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { __ Bind(GetEntryLabel()); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc()); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } private: @@ -428,6 +442,7 @@ class StackOverflowCheckSlowPathARM64 : public SlowPathCodeARM64 { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowStackOverflow), nullptr, 0); + CheckEntrypointTypes<kQuickThrowStackOverflow, void, void>(); } private: @@ -446,6 +461,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { codegen->SaveLiveRegisters(instruction_->GetLocations()); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc()); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); codegen->RestoreLiveRegisters(instruction_->GetLocations()); if (successor_ == nullptr) { __ B(GetReturnLabel()); @@ -502,9 +518,12 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, + const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } codegen->RestoreLiveRegisters(locations); @@ -949,8 +968,8 @@ void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) { } void CodeGeneratorARM64::Load(Primitive::Type type, - vixl::CPURegister dst, - const vixl::MemOperand& src) { + CPURegister dst, + const MemOperand& src) { switch (type) { case Primitive::kPrimBoolean: __ Ldrb(Register(dst), src); @@ -969,7 +988,7 @@ void CodeGeneratorARM64::Load(Primitive::Type type, case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(dst.Is64Bits() == Is64BitType(type)); + DCHECK_EQ(dst.Is64Bits(), Is64BitType(type)); __ Ldr(dst, src); break; case Primitive::kPrimVoid: @@ -977,31 +996,123 @@ void CodeGeneratorARM64::Load(Primitive::Type type, } } +void CodeGeneratorARM64::LoadAcquire(Primitive::Type type, + CPURegister dst, + const MemOperand& src) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp_base = temps.AcquireX(); + + DCHECK(!src.IsRegisterOffset()); + DCHECK(!src.IsPreIndex()); + DCHECK(!src.IsPostIndex()); + + // TODO(vixl): Let the MacroAssembler handle MemOperand. + __ Add(temp_base, src.base(), src.offset()); + MemOperand base = MemOperand(temp_base); + switch (type) { + case Primitive::kPrimBoolean: + __ Ldarb(Register(dst), base); + break; + case Primitive::kPrimByte: + __ Ldarb(Register(dst), base); + __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); + break; + case Primitive::kPrimChar: + __ Ldarh(Register(dst), base); + break; + case Primitive::kPrimShort: + __ Ldarh(Register(dst), base); + __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); + break; + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + DCHECK_EQ(dst.Is64Bits(), Is64BitType(type)); + __ Ldar(Register(dst), base); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + DCHECK(dst.IsFPRegister()); + DCHECK_EQ(dst.Is64Bits(), Is64BitType(type)); + + Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); + __ Ldar(temp, base); + __ Fmov(FPRegister(dst), temp); + break; + } + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << type; + } +} + void CodeGeneratorARM64::Store(Primitive::Type type, - vixl::CPURegister rt, - const vixl::MemOperand& dst) { + CPURegister src, + const MemOperand& dst) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - __ Strb(Register(rt), dst); + __ Strb(Register(src), dst); break; case Primitive::kPrimChar: case Primitive::kPrimShort: - __ Strh(Register(rt), dst); + __ Strh(Register(src), dst); break; case Primitive::kPrimInt: case Primitive::kPrimNot: case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(rt.Is64Bits() == Is64BitType(type)); - __ Str(rt, dst); + DCHECK_EQ(src.Is64Bits(), Is64BitType(type)); + __ Str(src, dst); break; case Primitive::kPrimVoid: LOG(FATAL) << "Unreachable type " << type; } } +void CodeGeneratorARM64::StoreRelease(Primitive::Type type, + CPURegister src, + const MemOperand& dst) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp_base = temps.AcquireX(); + + DCHECK(!dst.IsRegisterOffset()); + DCHECK(!dst.IsPreIndex()); + DCHECK(!dst.IsPostIndex()); + + // TODO(vixl): Let the MacroAssembler handle this. + __ Add(temp_base, dst.base(), dst.offset()); + MemOperand base = MemOperand(temp_base); + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + __ Stlrb(Register(src), base); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + __ Stlrh(Register(src), base); + break; + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + DCHECK_EQ(src.Is64Bits(), Is64BitType(type)); + __ Stlr(Register(src), base); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + DCHECK(src.IsFPRegister()); + DCHECK_EQ(src.Is64Bits(), Is64BitType(type)); + + Register temp = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); + __ Fmov(temp, FPRegister(src)); + __ Stlr(temp, base); + break; + } + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << type; + } +} + void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) { DCHECK(current_method.IsW()); __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset)); @@ -1026,14 +1137,47 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod vixl::Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); - __ Ldr(temp, HeapOperand(class_reg, mirror::Class::StatusOffset())); - __ Cmp(temp, mirror::Class::kStatusInitialized); - __ B(lt, slow_path->GetEntryLabel()); + size_t status_offset = mirror::Class::StatusOffset().SizeValue(); + // Even if the initialized flag is set, we need to ensure consistent memory ordering. - __ Dmb(InnerShareable, BarrierReads); + if (kUseAcquireRelease) { + // TODO(vixl): Let the MacroAssembler handle MemOperand. + __ Add(temp, class_reg, status_offset); + __ Ldar(temp, HeapOperand(temp)); + __ Cmp(temp, mirror::Class::kStatusInitialized); + __ B(lt, slow_path->GetEntryLabel()); + } else { + __ Ldr(temp, HeapOperand(class_reg, status_offset)); + __ Cmp(temp, mirror::Class::kStatusInitialized); + __ B(lt, slow_path->GetEntryLabel()); + __ Dmb(InnerShareable, BarrierReads); + } __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { + BarrierType type = BarrierAll; + + switch (kind) { + case MemBarrierKind::kAnyAny: + case MemBarrierKind::kAnyStore: { + type = BarrierAll; + break; + } + case MemBarrierKind::kLoadAny: { + type = BarrierReads; + break; + } + case MemBarrierKind::kStoreStore: { + type = BarrierWrites; + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } + __ Dmb(InnerShareable, type); +} + void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM64* slow_path = @@ -1288,7 +1432,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); if (value_type == Primitive::kPrimNot) { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc()); - + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } else { LocationSummary* locations = instruction->GetLocations(); Register obj = InputRegisterAt(instruction, 0); @@ -1660,28 +1804,54 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { } void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); - codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field); + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + // For IRIW sequential consistency kLoadAny is not sufficient. + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + } } void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); } void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - Primitive::Type field_type = instruction->GetFieldType(); - CPURegister value = InputCPURegisterAt(instruction, 1); Register obj = InputRegisterAt(instruction, 0); - codegen_->Store(field_type, value, HeapOperand(obj, instruction->GetFieldOffset())); - if (field_type == Primitive::kPrimNot) { + CPURegister value = InputCPURegisterAt(instruction, 1); + Offset offset = instruction->GetFieldOffset(); + Primitive::Type field_type = instruction->GetFieldType(); + + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset)); + } else { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->Store(field_type, value, HeapOperand(obj, offset)); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Store(field_type, value, HeapOperand(obj, offset)); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { codegen_->MarkGCCard(obj, Register(value)); } } @@ -1959,6 +2129,7 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins ? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); } void LocationsBuilderARM64::VisitMul(HMul* mul) { @@ -2044,9 +2215,11 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); locations->SetOut(LocationFrom(x0)); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, + void*, uint32_t, int32_t, mirror::ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { @@ -2055,11 +2228,13 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); DCHECK(type_index.Is(w0)); Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot); - DCHECK(current_method.Is(w1)); + DCHECK(current_method.Is(w2)); codegen_->LoadCurrentMethod(current_method); __ Mov(type_index, instruction->GetTypeIndex()); codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pAllocArrayWithAccessCheck), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, + void*, uint32_t, int32_t, mirror::ArtMethod*>(); } void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { @@ -2069,6 +2244,7 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { @@ -2081,6 +2257,7 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) __ Mov(type_index, instruction->GetTypeIndex()); codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pAllocObjectWithAccessCheck), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>(); } void LocationsBuilderARM64::VisitNot(HNot* instruction) { @@ -2175,9 +2352,12 @@ void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction) { } void LocationsBuilderARM64::VisitRem(HRem* rem) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); - switch (rem->GetResultType()) { + Primitive::Type type = rem->GetResultType(); + LocationSummary::CallKind call_kind = IsFPType(type) ? LocationSummary::kCall + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + + switch (type) { case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); @@ -2185,13 +2365,24 @@ void LocationsBuilderARM64::VisitRem(HRem* rem) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(type)); + + break; + } + default: - LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); + LOG(FATAL) << "Unexpected rem type " << type; } } void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); + switch (type) { case Primitive::kPrimInt: case Primitive::kPrimLong: { @@ -2206,6 +2397,14 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { break; } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) + : QUICK_ENTRY_POINT(pFmod); + codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc()); + break; + } + default: LOG(FATAL) << "Unexpected rem type " << type; } @@ -2294,7 +2493,18 @@ void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); - codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field); + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + // For IRIW sequential consistency kLoadAny is not sufficient. + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + } } void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { @@ -2305,13 +2515,24 @@ void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { } void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - CPURegister value = InputCPURegisterAt(instruction, 1); Register cls = InputRegisterAt(instruction, 0); + CPURegister value = InputCPURegisterAt(instruction, 1); Offset offset = instruction->GetFieldOffset(); Primitive::Type field_type = instruction->GetFieldType(); - codegen_->Store(field_type, value, HeapOperand(cls, offset)); - if (field_type == Primitive::kPrimNot) { + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset)); + } else { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->Store(field_type, value, HeapOperand(cls, offset)); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Store(field_type, value, HeapOperand(cls, offset)); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { codegen_->MarkGCCard(cls, Register(value)); } } @@ -2353,6 +2574,7 @@ void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) { codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 1d5bfb734e..19488a4ba2 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm64/assembler_arm64.h" @@ -108,6 +109,7 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg); + void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); void HandleShift(HBinaryOperation* instr); @@ -257,6 +259,8 @@ class CodeGeneratorARM64 : public CodeGenerator { void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); void LoadCurrentMethod(vixl::Register current_method); + void LoadAcquire(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); + void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); // Generate code to invoke a runtime entry point. void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index e7edd8a805..d377cb57c1 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -215,8 +215,8 @@ class LoadStringSlowPathX86 : public SlowPathCodeX86 { codegen->SaveLiveRegisters(locations); InvokeRuntimeCallingConvention calling_convention; - x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(0)); - __ movl(calling_convention.GetRegisterAt(1), Immediate(instruction_->GetStringIndex())); + x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); + __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex())); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pResolveString))); codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); @@ -2503,13 +2503,13 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); + codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(2)); __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); __ fs()->call( @@ -2656,82 +2656,115 @@ void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unreachable"; } -void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { + /* + * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. + * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. + * For those cases, all we need to ensure is that there is a scheduling barrier in place. + */ + switch (kind) { + case MemBarrierKind::kAnyAny: { + __ mfence(); + break; + } + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kStoreStore: { + // nop + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } +} + + +void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { + Label is_null; + __ testl(value, value); + __ j(kEqual, &is_null); + __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value())); + __ movl(temp, object); + __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); + __ movb(Address(temp, card, TIMES_1, 0), + X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); + __ Bind(&is_null); +} + +void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - bool is_byte_type = (field_type == Primitive::kPrimBoolean) - || (field_type == Primitive::kPrimByte); - // The register allocator does not support multiple - // inputs that die at entry with one in a specific register. - if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(1, Location::RegisterLocation(EAX)); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { + // Long values can be loaded atomically into an XMM using movsd. + // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM + // and then copy the XMM into the output 32bits at a time). + locations->AddTemp(Location::RequiresFpuRegister()); } } -void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (field_type) { - case Primitive::kPrimBoolean: + case Primitive::kPrimBoolean: { + __ movzxb(out.AsRegister<Register>(), Address(base, offset)); + break; + } + case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); - __ movb(Address(obj, offset), value); + __ movsxb(out.AsRegister<Register>(), Address(base, offset)); + break; + } + + case Primitive::kPrimShort: { + __ movsxw(out.AsRegister<Register>(), Address(base, offset)); break; } - case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movw(Address(obj, offset), value); + __ movzxw(out.AsRegister<Register>(), Address(base, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movl(Address(obj, offset), value); - - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, obj, value); - } + __ movl(out.AsRegister<Register>(), Address(base, offset)); break; } case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ movl(Address(obj, offset), value.AsRegisterPairLow<Register>()); - __ movl(Address(obj, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); + if (is_volatile) { + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movsd(temp, Address(base, offset)); + __ movd(out.AsRegisterPairLow<Register>(), temp); + __ psrlq(temp, Immediate(32)); + __ movd(out.AsRegisterPairHigh<Register>(), temp); + } else { + __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset)); + __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset)); + } break; } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(obj, offset), value); + __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(obj, offset), value); + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } @@ -2739,87 +2772,152 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instr LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } -} -void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { - Label is_null; - __ testl(value, value); - __ j(kEqual, &is_null); - __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value())); - __ movl(temp, object); - __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); - __ movb(Address(temp, card, TIMES_1, 0), - X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); - __ Bind(&is_null); + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } -void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + bool is_byte_type = (field_type == Primitive::kPrimBoolean) + || (field_type == Primitive::kPrimByte); + + // The register allocator does not support multiple + // inputs that die at entry with one in a specific register. + if (is_byte_type) { + // Ensure the value is in a byte register. + locations->SetInAt(1, Location::RegisterLocation(EAX)); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + // Temporary registers for the write barrier. + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + locations->AddTemp(Location::RequiresRegister()); + // Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } else if (is_volatile && (field_type == Primitive::kPrimLong)) { + // 64bits value can be atomically written to an address with movsd and an XMM register. + // We need two XMM registers because there's no easier way to (bit) copy a register pair + // into a single XMM register (we copy each pair part into the XMMs and then interleave them). + // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the + // isolated cases when we need this it isn't worth adding the extra complexity. + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + } } -void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Register base = locations->InAt(0).AsRegister<Register>(); + Location value = locations->InAt(1); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxb(out, Address(obj, offset)); - break; - } + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + switch (field_type) { + case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxb(out, Address(obj, offset)); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxw(out, Address(obj, offset)); + __ movb(Address(base, offset), value.AsRegister<ByteRegister>()); break; } + case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxw(out, Address(obj, offset)); + __ movw(Address(base, offset), value.AsRegister<Register>()); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ movl(out, Address(obj, offset)); + __ movl(Address(base, offset), value.AsRegister<Register>()); + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>()); + } break; } case Primitive::kPrimLong: { - // TODO: support volatile. - __ movl(locations->Out().AsRegisterPairLow<Register>(), Address(obj, offset)); - __ movl(locations->Out().AsRegisterPairHigh<Register>(), Address(obj, kX86WordSize + offset)); + if (is_volatile) { + XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + __ movd(temp1, value.AsRegisterPairLow<Register>()); + __ movd(temp2, value.AsRegisterPairHigh<Register>()); + __ punpckldq(temp1, temp2); + __ movsd(Address(base, offset), temp1); + } else { + __ movl(Address(base, offset), value.AsRegisterPairLow<Register>()); + __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); + } break; } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(obj, offset)); + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(obj, offset)); + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } +} + +void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) { @@ -3383,159 +3481,6 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( // No need for memory fence, thanks to the X86 memory model. } -void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); - __ movsxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); - __ movzxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register out = locations->Out().AsRegister<Register>(); - __ movl(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimLong: { - // TODO: support volatile. - __ movl(locations->Out().AsRegisterPairLow<Register>(), Address(cls, offset)); - __ movl(locations->Out().AsRegisterPairHigh<Register>(), Address(cls, kX86WordSize + offset)); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); - bool is_byte_type = (field_type == Primitive::kPrimBoolean) - || (field_type == Primitive::kPrimByte); - // The register allocator does not support multiple - // inputs that die at entry with one in a specific register. - if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(1, Location::RegisterLocation(EAX)); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - // Temporary registers for the write barrier. - if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); - } -} - -void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); - __ movb(Address(cls, offset), value); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movw(Address(cls, offset), value); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register value = locations->InAt(1).AsRegister<Register>(); - __ movl(Address(cls, offset), value); - - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - Location value = locations->InAt(1); - __ movl(Address(cls, offset), value.AsRegisterPairLow<Register>()); - __ movl(Address(cls, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(cls, offset), value); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(cls, offset), value); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index aed06c04df..636f8845e5 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86/assembler_x86.h" @@ -105,6 +106,8 @@ class LocationsBuilderX86 : public HGraphVisitor { void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); void HandleShift(HBinaryOperation* instruction); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorX86* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -137,6 +140,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ff7fcdcbac..5fc24f71e6 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -285,8 +285,8 @@ class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 { codegen->SaveLiveRegisters(locations); InvokeRuntimeCallingConvention calling_convention; - x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(0))); - __ movl(CpuRegister(calling_convention.GetRegisterAt(1)), + x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); + __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction_->GetStringIndex())); __ gs()->call(Address::Absolute( QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pResolveString), true)); @@ -2311,14 +2311,14 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetOut(Location::RegisterLocation(RAX)); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); + codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2))); __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex())); __ gs()->call(Address::Absolute( @@ -2389,69 +2389,87 @@ void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unimplemented"; } -void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { + /* + * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. + * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. + * For those cases, all we need to ensure is that there is a scheduling barrier in place. + */ + switch (kind) { + case MemBarrierKind::kAnyAny: { + __ mfence(); + break; + } + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kStoreStore: { + // nop + break; + } + default: + LOG(FATAL) << "Unexpected memory barier " << kind; + } +} + +void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue()); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } -void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { +void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - Primitive::Type field_type = instruction->GetFieldType(); + CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (field_type) { - case Primitive::kPrimBoolean: + case Primitive::kPrimBoolean: { + __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset)); + break; + } + case Primitive::kPrimByte: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movb(Address(obj, offset), value); + __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset)); + break; + } + + case Primitive::kPrimShort: { + __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } - case Primitive::kPrimShort: case Primitive::kPrimChar: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movw(Address(obj, offset), value); + __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movl(Address(obj, offset), value); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, obj, value); - } + __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimLong: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movq(Address(obj, offset), value); + __ movq(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(obj, offset), value); + __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(obj, offset), value); + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } @@ -2459,74 +2477,124 @@ void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* in LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } -void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(field_info.GetFieldType(), instruction->InputAt(1)); + locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetInAt(1, Location::RequiresRegister()); + if (needs_write_barrier) { + // Temporary registers for the write barrier. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } } -void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { +void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); + CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); + Location value = locations->InAt(1); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxb(out, Address(obj, offset)); - break; - } + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + switch (field_type) { + case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxb(out, Address(obj, offset)); - break; - } - - case Primitive::kPrimShort: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxw(out, Address(obj, offset)); + __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); break; } + case Primitive::kPrimShort: case Primitive::kPrimChar: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxw(out, Address(obj, offset)); + __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movl(out, Address(obj, offset)); + __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimLong: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movq(out, Address(obj, offset)); + __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); break; } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(obj, offset)); + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(obj, offset)); + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } +} + +void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { @@ -3222,146 +3290,6 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); } -void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - - switch (instruction->GetType()) { - case Primitive::kPrimBoolean: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimByte: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxb(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimShort: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movsxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimChar: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movzxw(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movl(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimLong: { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ movq(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movss(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); - __ movsd(out, Address(cls, offset)); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - Primitive::Type field_type = instruction->GetFieldType(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue()); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>(); - size_t offset = instruction->GetFieldOffset().SizeValue(); - Primitive::Type field_type = instruction->GetFieldType(); - - switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movb(Address(cls, offset), value); - break; - } - - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movw(Address(cls, offset), value); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movl(Address(cls, offset), value); - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, cls, value); - } - break; - } - - case Primitive::kPrimLong: { - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); - __ movq(Address(cls, offset), value); - break; - } - - case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movss(Address(cls, offset), value); - break; - } - - case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); - __ movsd(Address(cls, offset), value); - break; - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; - UNREACHABLE(); - } -} - void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 794b81ffbc..070886460b 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86_64/assembler_x86_64.h" @@ -109,6 +110,8 @@ class LocationsBuilderX86_64 : public HGraphVisitor { void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction); CodeGeneratorX86_64* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -138,6 +141,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleShift(HBinaryOperation* operation); + void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 8b75cc7c65..18722f732b 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -17,6 +17,7 @@ #include <functional> #include "arch/instruction_set.h" +#include "arch/arm/instruction_set_features_arm.h" #include "base/macros.h" #include "builder.h" #include "code_generator_arm.h" @@ -87,7 +88,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenX86, has_result, expected); } - arm::CodeGeneratorARM codegenARM(graph); + std::unique_ptr<const ArmInstructionSetFeatures> features( + ArmInstructionSetFeatures::FromCppDefines()); + arm::CodeGeneratorARM codegenARM(graph, features.get()); codegenARM.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { Run(allocator, codegenARM, has_result, expected); @@ -130,7 +133,7 @@ static void RunCodeOptimized(HGraph* graph, bool has_result, Expected expected) { if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { - arm::CodeGeneratorARM codegenARM(graph); + arm::CodeGeneratorARM codegenARM(graph, ArmInstructionSetFeatures::FromCppDefines()); RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kArm64) { arm64::CodeGeneratorARM64 codegenARM64(graph); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index b14b0a70e2..9ed1e4528c 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -142,8 +142,7 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } } - void VisitParallelMove(HParallelMove* instruction) { - output_ << instruction->DebugName(); + void VisitParallelMove(HParallelMove* instruction) OVERRIDE { output_ << " ("; for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) { MoveOperands* move = instruction->MoveOperandsAt(i); @@ -158,8 +157,25 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << " (liveness: " << instruction->GetLifetimePosition() << ")"; } - void VisitInstruction(HInstruction* instruction) { + void VisitIntConstant(HIntConstant *instruction) OVERRIDE { + output_ << " " << instruction->GetValue(); + } + + void VisitLongConstant(HLongConstant *instruction) OVERRIDE { + output_ << " " << instruction->GetValue(); + } + + void VisitFloatConstant(HFloatConstant *instruction) OVERRIDE { + output_ << " " << instruction->GetValue(); + } + + void VisitDoubleConstant(HDoubleConstant *instruction) OVERRIDE { + output_ << " " << instruction->GetValue(); + } + + void PrintInstruction(HInstruction* instruction) { output_ << instruction->DebugName(); + instruction->Accept(this); if (instruction->InputCount() > 0) { output_ << " [ "; for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) { @@ -167,15 +183,6 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } output_ << "]"; } - if (instruction->IsIntConstant()) { - output_ << " " << instruction->AsIntConstant()->GetValue(); - } else if (instruction->IsLongConstant()) { - output_ << " " << instruction->AsLongConstant()->GetValue(); - } else if (instruction->IsFloatConstant()) { - output_ << " " << instruction->AsFloatConstant()->GetValue(); - } else if (instruction->IsDoubleConstant()) { - output_ << " " << instruction->AsDoubleConstant()->GetValue(); - } if (pass_name_ == kLivenessPassName && instruction->GetLifetimePosition() != kNoLifetime) { output_ << " (liveness: " << instruction->GetLifetimePosition(); if (instruction->HasLiveInterval()) { @@ -210,7 +217,7 @@ class HGraphVisualizerPrinter : public HGraphVisitor { int bci = 0; output_ << bci << " " << instruction->NumberOfUses() << " " << GetTypeId(instruction->GetType()) << instruction->GetId() << " "; - instruction->Accept(this); + PrintInstruction(instruction); output_ << kEndInstructionMarker << std::endl; } } @@ -222,7 +229,7 @@ class HGraphVisualizerPrinter : public HGraphVisitor { EndTag("cfg"); } - void VisitBasicBlock(HBasicBlock* block) { + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { StartTag("block"); PrintProperty("name", "B", block->GetBlockId()); if (block->GetLifetimeStart() != kNoLifetime) { diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index 94ff192264..48f1ea9e15 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -40,18 +40,22 @@ TEST(GVNTest, LocalFieldElimination) { entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); HInstruction* to_remove = block->GetLastInstruction(); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(43))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(43), false)); HInstruction* different_offset = block->GetLastInstruction(); // Kill the value. block->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, + MemberOffset(42), false)); HInstruction* use_after_kill = block->GetLastInstruction(); block->AddInstruction(new (&allocator) HExit()); @@ -82,7 +86,8 @@ TEST(GVNTest, GlobalFieldElimination) { graph->AddBlock(block); entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); block->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); HBasicBlock* then = new (&allocator) HBasicBlock(graph); @@ -98,13 +103,16 @@ TEST(GVNTest, GlobalFieldElimination) { else_->AddSuccessor(join); then->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); then->AddInstruction(new (&allocator) HGoto()); else_->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); else_->AddInstruction(new (&allocator) HGoto()); join->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); join->AddInstruction(new (&allocator) HExit()); graph->TryBuildingSsa(); @@ -132,7 +140,8 @@ TEST(GVNTest, LoopFieldElimination) { graph->AddBlock(block); entry->AddSuccessor(block); block->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); block->AddInstruction(new (&allocator) HGoto()); HBasicBlock* loop_header = new (&allocator) HBasicBlock(graph); @@ -148,22 +157,25 @@ TEST(GVNTest, LoopFieldElimination) { loop_body->AddSuccessor(loop_header); loop_header->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_loop_header = loop_header->GetLastInstruction(); loop_header->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); // Kill inside the loop body to prevent field gets inside the loop header // and the body to be GVN'ed. loop_body->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); HInstruction* field_set = loop_body->GetLastInstruction(); loop_body->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_loop_body = loop_body->GetLastInstruction(); loop_body->AddInstruction(new (&allocator) HGoto()); exit->AddInstruction( - new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, + MemberOffset(42), false)); HInstruction* field_get_in_exit = exit->GetLastInstruction(); exit->AddInstruction(new (&allocator) HExit()); @@ -242,7 +254,7 @@ TEST(GVNTest, LoopSideEffects) { { // Make one block with a side effect. entry->AddInstruction(new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false)); GlobalValueNumberer gvn(&allocator, graph); gvn.Run(); @@ -256,7 +268,7 @@ TEST(GVNTest, LoopSideEffects) { { outer_loop_body->InsertInstructionBefore( new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42)), + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false), outer_loop_body->GetLastInstruction()); GlobalValueNumberer gvn(&allocator, graph); @@ -273,7 +285,7 @@ TEST(GVNTest, LoopSideEffects) { outer_loop_body->RemoveInstruction(outer_loop_body->GetFirstInstruction()); inner_loop_body->InsertInstructionBefore( new (&allocator) HInstanceFieldSet( - parameter, parameter, Primitive::kPrimNot, MemberOffset(42)), + parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false), inner_loop_body->GetLastInstruction()); GlobalValueNumberer gvn(&allocator, graph); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 73eb521ea6..493d93f052 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -27,6 +27,7 @@ #include "mirror/class_loader.h" #include "mirror/dex_cache.h" #include "nodes.h" +#include "register_allocator.h" #include "ssa_phi_elimination.h" #include "scoped_thread_state_change.h" #include "thread.h" @@ -143,6 +144,13 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, return false; } + if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, + compiler_driver_->GetInstructionSet())) { + VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file) + << " cannot be inlined because of the register allocator"; + return false; + } + if (!callee_graph->TryBuildingSsa()) { VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file) << " could not be transformed to SSA"; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index c963b70492..0fc1fd8663 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2128,39 +2128,45 @@ class HNullCheck : public HExpression<1> { class FieldInfo : public ValueObject { public: - FieldInfo(MemberOffset field_offset, Primitive::Type field_type) - : field_offset_(field_offset), field_type_(field_type) {} + FieldInfo(MemberOffset field_offset, Primitive::Type field_type, bool is_volatile) + : field_offset_(field_offset), field_type_(field_type), is_volatile_(is_volatile) {} MemberOffset GetFieldOffset() const { return field_offset_; } Primitive::Type GetFieldType() const { return field_type_; } + bool IsVolatile() const { return is_volatile_; } private: const MemberOffset field_offset_; const Primitive::Type field_type_; + const bool is_volatile_; }; class HInstanceFieldGet : public HExpression<1> { public: HInstanceFieldGet(HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HExpression(field_type, SideEffects::DependsOnSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, value); } - virtual bool CanBeMoved() const { return true; } - virtual bool InstructionDataEquals(HInstruction* other) const { - size_t other_offset = other->AsInstanceFieldGet()->GetFieldOffset().SizeValue(); - return other_offset == GetFieldOffset().SizeValue(); + bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } + + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + HInstanceFieldGet* other_get = other->AsInstanceFieldGet(); + return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } virtual size_t ComputeHashCode() const { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } DECLARE_INSTRUCTION(InstanceFieldGet); @@ -2175,15 +2181,18 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { HInstanceFieldSet(HInstruction* object, HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HTemplateInstruction(SideEffects::ChangesSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, object); SetRawInputAt(1, value); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } HInstruction* GetValue() const { return InputAt(1); } @@ -2496,24 +2505,29 @@ class HStaticFieldGet : public HExpression<1> { public: HStaticFieldGet(HInstruction* cls, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HExpression(field_type, SideEffects::DependsOnSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, cls); } - bool CanBeMoved() const OVERRIDE { return true; } + + bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { - size_t other_offset = other->AsStaticFieldGet()->GetFieldOffset().SizeValue(); - return other_offset == GetFieldOffset().SizeValue(); + HStaticFieldGet* other_get = other->AsStaticFieldGet(); + return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } size_t ComputeHashCode() const OVERRIDE { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } DECLARE_INSTRUCTION(StaticFieldGet); @@ -2528,15 +2542,18 @@ class HStaticFieldSet : public HTemplateInstruction<2> { HStaticFieldSet(HInstruction* cls, HInstruction* value, Primitive::Type field_type, - MemberOffset field_offset) + MemberOffset field_offset, + bool is_volatile) : HTemplateInstruction(SideEffects::ChangesSomething()), - field_info_(field_offset, field_type) { + field_info_(field_offset, field_type, is_volatile) { SetRawInputAt(0, cls); SetRawInputAt(1, value); } + const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + bool IsVolatile() const { return field_info_.IsVolatile(); } HInstruction* GetValue() const { return InputAt(1); } @@ -2677,7 +2694,7 @@ class HMonitorOperation : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(MonitorOperation); - protected: + private: const OperationKind kind_; const uint32_t dex_pc_; @@ -2685,7 +2702,6 @@ class HMonitorOperation : public HTemplateInstruction<1> { DISALLOW_COPY_AND_ASSIGN(HMonitorOperation); }; - class MoveOperands : public ArenaObject<kArenaAllocMisc> { public: MoveOperands(Location source, Location destination, HInstruction* instruction) diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 94751f876c..1a27724d33 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -225,13 +225,13 @@ static void RunOptimizations(HGraph* graph, // The stack map we generate must be 4-byte aligned on ARM. Since existing // maps are generated alongside these stack maps, we must also align them. -static std::vector<uint8_t>& AlignVectorSize(std::vector<uint8_t>& vector) { +static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) { size_t size = vector.size(); size_t aligned_size = RoundUp(size, 4); for (; size < aligned_size; ++size) { vector.push_back(0); } - return vector; + return ArrayRef<const uint8_t>(vector); } CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, @@ -289,7 +289,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } - CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set); + CompilerDriver* compiler_driver = GetCompilerDriver(); + CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set, + *compiler_driver->GetInstructionSetFeatures()); if (codegen == nullptr) { CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler"; compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen); @@ -315,7 +317,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } RunOptimizations( - graph, GetCompilerDriver(), &compilation_stats_, dex_compilation_unit, visualizer); + graph, compiler_driver, &compilation_stats_, dex_compilation_unit, visualizer); PrepareForRegisterAllocation(graph).Run(); SsaLivenessAnalysis liveness(*graph, codegen); @@ -332,13 +334,14 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, codegen->BuildStackMaps(&stack_map); compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized); - return new CompiledMethod(GetCompilerDriver(), - instruction_set, - allocator.GetMemory(), - codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - 0, /* FPR spill mask, unused */ - stack_map); + return CompiledMethod::SwapAllocCompiledMethodStackMap( + compiler_driver, + instruction_set, + ArrayRef<const uint8_t>(allocator.GetMemory()), + codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + 0, /* FPR spill mask, unused */ + ArrayRef<const uint8_t>(stack_map)); } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) { LOG(FATAL) << "Could not allocate registers in optimizing compiler"; UNREACHABLE(); @@ -356,27 +359,26 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, codegen->CompileBaseline(&allocator); std::vector<uint8_t> mapping_table; - SrcMap src_mapping_table; - codegen->BuildMappingTable(&mapping_table, - GetCompilerDriver()->GetCompilerOptions().GetIncludeDebugSymbols() ? - &src_mapping_table : nullptr); + DefaultSrcMap src_mapping_table; + bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(); + codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr); std::vector<uint8_t> vmap_table; codegen->BuildVMapTable(&vmap_table); std::vector<uint8_t> gc_map; codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline); - return new CompiledMethod(GetCompilerDriver(), - instruction_set, - allocator.GetMemory(), - codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - 0, /* FPR spill mask, unused */ - &src_mapping_table, - AlignVectorSize(mapping_table), - AlignVectorSize(vmap_table), - AlignVectorSize(gc_map), - nullptr); + return CompiledMethod::SwapAllocCompiledMethod(compiler_driver, + instruction_set, + ArrayRef<const uint8_t>(allocator.GetMemory()), + codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + 0, /* FPR spill mask, unused */ + &src_mapping_table, + AlignVectorSize(mapping_table), + AlignVectorSize(vmap_table), + AlignVectorSize(gc_map), + ArrayRef<const uint8_t>()); } } diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 7993b19850..cc2723df99 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -38,7 +38,6 @@ enum MethodCompilationStat { kNotCompiledUnresolvedMethod, kNotCompiledUnresolvedField, kNotCompiledNonSequentialRegPair, - kNotCompiledVolatile, kNotOptimizedTryCatch, kNotOptimizedDisabled, kNotCompiledCantAccesType, @@ -92,7 +91,6 @@ class OptimizingCompilerStats { case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod"; case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField"; case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair"; - case kNotCompiledVolatile : return "kNotCompiledVolatile"; case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch"; case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index f677e840ef..c2ea80ec33 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -462,7 +462,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, entry->AddSuccessor(block); HInstruction* test = new (allocator) HInstanceFieldGet( - parameter, Primitive::kPrimBoolean, MemberOffset(22)); + parameter, Primitive::kPrimBoolean, MemberOffset(22), false); block->AddInstruction(test); block->AddInstruction(new (allocator) HIf(test)); HBasicBlock* then = new (allocator) HBasicBlock(graph); @@ -481,8 +481,10 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, *phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); join->AddPhi(*phi); - *input1 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); - *input2 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); + *input1 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); + *input2 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); then->AddInstruction(*input1); else_->AddInstruction(*input2); join->AddInstruction(new (allocator) HExit()); @@ -581,7 +583,8 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator, graph->AddBlock(block); entry->AddSuccessor(block); - *field = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42)); + *field = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, + MemberOffset(42), false); block->AddInstruction(*field); *ret = new (allocator) HReturn(*field); block->AddInstruction(*ret); diff --git a/compiler/optimizing/test/ConstantFolding.java b/compiler/optimizing/test/ConstantFolding.java index 7fac5a985c..92f2a775b9 100644 --- a/compiler/optimizing/test/ConstantFolding.java +++ b/compiler/optimizing/test/ConstantFolding.java @@ -22,13 +22,13 @@ public class ConstantFolding { */ // CHECK-START: int ConstantFolding.IntNegation() constant_folding (before) - // CHECK: [[Const42:i[0-9]+]] IntConstant 42 - // CHECK: [[Neg:i[0-9]+]] Neg [ [[Const42]] ] - // CHECK: Return [ [[Neg]] ] + // CHECK-DAG: [[Const42:i[0-9]+]] IntConstant 42 + // CHECK-DAG: [[Neg:i[0-9]+]] Neg [ [[Const42]] ] + // CHECK-DAG: Return [ [[Neg]] ] // CHECK-START: int ConstantFolding.IntNegation() constant_folding (after) - // CHECK: [[ConstN42:i[0-9]+]] IntConstant -42 - // CHECK: Return [ [[ConstN42]] ] + // CHECK-DAG: [[ConstN42:i[0-9]+]] IntConstant -42 + // CHECK-DAG: Return [ [[ConstN42]] ] public static int IntNegation() { int x, y; @@ -43,14 +43,14 @@ public class ConstantFolding { */ // CHECK-START: int ConstantFolding.IntAddition1() constant_folding (before) - // CHECK: [[Const1:i[0-9]+]] IntConstant 1 - // CHECK: [[Const2:i[0-9]+]] IntConstant 2 - // CHECK: [[Add:i[0-9]+]] Add [ [[Const1]] [[Const2]] ] - // CHECK: Return [ [[Add]] ] + // CHECK-DAG: [[Const1:i[0-9]+]] IntConstant 1 + // CHECK-DAG: [[Const2:i[0-9]+]] IntConstant 2 + // CHECK-DAG: [[Add:i[0-9]+]] Add [ [[Const1]] [[Const2]] ] + // CHECK-DAG: Return [ [[Add]] ] // CHECK-START: int ConstantFolding.IntAddition1() constant_folding (after) - // CHECK: [[Const3:i[0-9]+]] IntConstant 3 - // CHECK: Return [ [[Const3]] ] + // CHECK-DAG: [[Const3:i[0-9]+]] IntConstant 3 + // CHECK-DAG: Return [ [[Const3]] ] public static int IntAddition1() { int a, b, c; @@ -66,18 +66,18 @@ public class ConstantFolding { */ // CHECK-START: int ConstantFolding.IntAddition2() constant_folding (before) - // CHECK: [[Const1:i[0-9]+]] IntConstant 1 - // CHECK: [[Const2:i[0-9]+]] IntConstant 2 - // CHECK: [[Const5:i[0-9]+]] IntConstant 5 - // CHECK: [[Const6:i[0-9]+]] IntConstant 6 - // CHECK: [[Add1:i[0-9]+]] Add [ [[Const1]] [[Const2]] ] - // CHECK: [[Add2:i[0-9]+]] Add [ [[Const5]] [[Const6]] ] - // CHECK: [[Add3:i[0-9]+]] Add [ [[Add1]] [[Add2]] ] - // CHECK: Return [ [[Add3]] ] + // CHECK-DAG: [[Const1:i[0-9]+]] IntConstant 1 + // CHECK-DAG: [[Const2:i[0-9]+]] IntConstant 2 + // CHECK-DAG: [[Const5:i[0-9]+]] IntConstant 5 + // CHECK-DAG: [[Const6:i[0-9]+]] IntConstant 6 + // CHECK-DAG: [[Add1:i[0-9]+]] Add [ [[Const1]] [[Const2]] ] + // CHECK-DAG: [[Add2:i[0-9]+]] Add [ [[Const5]] [[Const6]] ] + // CHECK-DAG: [[Add3:i[0-9]+]] Add [ [[Add1]] [[Add2]] ] + // CHECK-DAG: Return [ [[Add3]] ] // CHECK-START: int ConstantFolding.IntAddition2() constant_folding (after) - // CHECK: [[Const14:i[0-9]+]] IntConstant 14 - // CHECK: Return [ [[Const14]] ] + // CHECK-DAG: [[Const14:i[0-9]+]] IntConstant 14 + // CHECK-DAG: Return [ [[Const14]] ] public static int IntAddition2() { int a, b, c; @@ -97,14 +97,14 @@ public class ConstantFolding { */ // CHECK-START: int ConstantFolding.IntSubtraction() constant_folding (before) - // CHECK: [[Const5:i[0-9]+]] IntConstant 5 - // CHECK: [[Const2:i[0-9]+]] IntConstant 2 - // CHECK: [[Sub:i[0-9]+]] Sub [ [[Const5]] [[Const2]] ] - // CHECK: Return [ [[Sub]] ] + // CHECK-DAG: [[Const5:i[0-9]+]] IntConstant 5 + // CHECK-DAG: [[Const2:i[0-9]+]] IntConstant 2 + // CHECK-DAG: [[Sub:i[0-9]+]] Sub [ [[Const5]] [[Const2]] ] + // CHECK-DAG: Return [ [[Sub]] ] // CHECK-START: int ConstantFolding.IntSubtraction() constant_folding (after) - // CHECK: [[Const3:i[0-9]+]] IntConstant 3 - // CHECK: Return [ [[Const3]] ] + // CHECK-DAG: [[Const3:i[0-9]+]] IntConstant 3 + // CHECK-DAG: Return [ [[Const3]] ] public static int IntSubtraction() { int a, b, c; @@ -120,14 +120,14 @@ public class ConstantFolding { */ // CHECK-START: long ConstantFolding.LongAddition() constant_folding (before) - // CHECK: [[Const1:j[0-9]+]] LongConstant 1 - // CHECK: [[Const2:j[0-9]+]] LongConstant 2 - // CHECK: [[Add:j[0-9]+]] Add [ [[Const1]] [[Const2]] ] - // CHECK: Return [ [[Add]] ] + // CHECK-DAG: [[Const1:j[0-9]+]] LongConstant 1 + // CHECK-DAG: [[Const2:j[0-9]+]] LongConstant 2 + // CHECK-DAG: [[Add:j[0-9]+]] Add [ [[Const1]] [[Const2]] ] + // CHECK-DAG: Return [ [[Add]] ] // CHECK-START: long ConstantFolding.LongAddition() constant_folding (after) - // CHECK: [[Const3:j[0-9]+]] LongConstant 3 - // CHECK: Return [ [[Const3]] ] + // CHECK-DAG: [[Const3:j[0-9]+]] LongConstant 3 + // CHECK-DAG: Return [ [[Const3]] ] public static long LongAddition() { long a, b, c; @@ -143,14 +143,14 @@ public class ConstantFolding { */ // CHECK-START: long ConstantFolding.LongSubtraction() constant_folding (before) - // CHECK: [[Const5:j[0-9]+]] LongConstant 5 - // CHECK: [[Const2:j[0-9]+]] LongConstant 2 - // CHECK: [[Sub:j[0-9]+]] Sub [ [[Const5]] [[Const2]] ] - // CHECK: Return [ [[Sub]] ] + // CHECK-DAG: [[Const5:j[0-9]+]] LongConstant 5 + // CHECK-DAG: [[Const2:j[0-9]+]] LongConstant 2 + // CHECK-DAG: [[Sub:j[0-9]+]] Sub [ [[Const5]] [[Const2]] ] + // CHECK-DAG: Return [ [[Sub]] ] // CHECK-START: long ConstantFolding.LongSubtraction() constant_folding (after) - // CHECK: [[Const3:j[0-9]+]] LongConstant 3 - // CHECK: Return [ [[Const3]] ] + // CHECK-DAG: [[Const3:j[0-9]+]] LongConstant 3 + // CHECK-DAG: Return [ [[Const3]] ] public static long LongSubtraction() { long a, b, c; @@ -165,14 +165,14 @@ public class ConstantFolding { */ // CHECK-START: int ConstantFolding.StaticCondition() constant_folding (before) - // CHECK: [[Const5:i[0-9]+]] IntConstant 5 - // CHECK: [[Const2:i[0-9]+]] IntConstant 2 - // CHECK: [[Cond:z[0-9]+]] GreaterThanOrEqual [ [[Const5]] [[Const2]] ] - // CHECK: If [ [[Cond]] ] + // CHECK-DAG: [[Const5:i[0-9]+]] IntConstant 5 + // CHECK-DAG: [[Const2:i[0-9]+]] IntConstant 2 + // CHECK-DAG: [[Cond:z[0-9]+]] GreaterThanOrEqual [ [[Const5]] [[Const2]] ] + // CHECK-DAG: If [ [[Cond]] ] // CHECK-START: int ConstantFolding.StaticCondition() constant_folding (after) - // CHECK: [[Const1:i[0-9]+]] IntConstant 1 - // CHECK: If [ [[Const1]] ] + // CHECK-DAG: [[Const1:i[0-9]+]] IntConstant 1 + // CHECK-DAG: If [ [[Const1]] ] public static int StaticCondition() { int a, b, c; @@ -195,18 +195,18 @@ public class ConstantFolding { */ // CHECK-START: int ConstantFolding.JumpsAndConditionals(boolean) constant_folding (before) - // CHECK: [[Const5:i[0-9]+]] IntConstant 5 - // CHECK: [[Const2:i[0-9]+]] IntConstant 2 - // CHECK: [[Add:i[0-9]+]] Add [ [[Const5]] [[Const2]] ] - // CHECK: [[Phi:i[0-9]+]] Phi [ [[Add]] [[Sub:i[0-9]+]] ] - // CHECK: Return [ [[Phi]] ] - // CHECK: [[Sub]] Sub [ [[Const5]] [[Const2]] ] + // CHECK-DAG: [[Const2:i[0-9]+]] IntConstant 2 + // CHECK-DAG: [[Const5:i[0-9]+]] IntConstant 5 + // CHECK-DAG: [[Add:i[0-9]+]] Add [ [[Const5]] [[Const2]] ] + // CHECK-DAG: [[Sub:i[0-9]+]] Sub [ [[Const5]] [[Const2]] ] + // CHECK-DAG: [[Phi:i[0-9]+]] Phi [ [[Add]] [[Sub]] ] + // CHECK-DAG: Return [ [[Phi]] ] // CHECK-START: int ConstantFolding.JumpsAndConditionals(boolean) constant_folding (after) - // CHECK: [[Const7:i[0-9]+]] IntConstant 7 - // CHECK: [[Phi:i[0-9]+]] Phi [ [[Const7]] [[Const3:i[0-9]+]] ] - // CHECK: Return [ [[Phi]] ] - // CHECK: [[Const3]] IntConstant 3 + // CHECK-DAG: [[Const3:i[0-9]+]] IntConstant 3 + // CHECK-DAG: [[Const7:i[0-9]+]] IntConstant 7 + // CHECK-DAG: [[Phi:i[0-9]+]] Phi [ [[Const7]] [[Const3]] ] + // CHECK-DAG: Return [ [[Phi]] ] public static int JumpsAndConditionals(boolean cond) { int a, b, c; diff --git a/compiler/optimizing/test/Inliner.java b/compiler/optimizing/test/Inliner.java new file mode 100644 index 0000000000..ce7409c958 --- /dev/null +++ b/compiler/optimizing/test/Inliner.java @@ -0,0 +1,202 @@ +/* +* Copyright (C) 2014 The Android Open Source Project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +public class Inliner { + + // CHECK-START: void Inliner.InlineVoid() inliner (before) + // CHECK-DAG: [[Const42:i[0-9]+]] IntConstant 42 + // CHECK-DAG: InvokeStaticOrDirect + // CHECK-DAG: InvokeStaticOrDirect [ [[Const42]] ] + + // CHECK-START: void Inliner.InlineVoid() inliner (after) + // CHECK-NOT: InvokeStaticOrDirect + + public static void InlineVoid() { + returnVoid(); + returnVoidWithOneParameter(42); + } + + // CHECK-START: int Inliner.InlineParameter(int) inliner (before) + // CHECK-DAG: [[Param:i[0-9]+]] ParameterValue + // CHECK-DAG: [[Result:i[0-9]+]] InvokeStaticOrDirect [ [[Param]] ] + // CHECK-DAG: Return [ [[Result]] ] + + // CHECK-START: int Inliner.InlineParameter(int) inliner (after) + // CHECK-DAG: [[Param:i[0-9]+]] ParameterValue + // CHECK-DAG: Return [ [[Param]] ] + + public static int InlineParameter(int a) { + return returnParameter(a); + } + + // CHECK-START: long Inliner.InlineWideParameter(long) inliner (before) + // CHECK-DAG: [[Param:j[0-9]+]] ParameterValue + // CHECK-DAG: [[Result:j[0-9]+]] InvokeStaticOrDirect [ [[Param]] ] + // CHECK-DAG: Return [ [[Result]] ] + + // CHECK-START: long Inliner.InlineWideParameter(long) inliner (after) + // CHECK-DAG: [[Param:j[0-9]+]] ParameterValue + // CHECK-DAG: Return [ [[Param]] ] + + public static long InlineWideParameter(long a) { + return returnWideParameter(a); + } + + // CHECK-START: java.lang.Object Inliner.InlineReferenceParameter(java.lang.Object) inliner (before) + // CHECK-DAG: [[Param:l[0-9]+]] ParameterValue + // CHECK-DAG: [[Result:l[0-9]+]] InvokeStaticOrDirect [ [[Param]] ] + // CHECK-DAG: Return [ [[Result]] ] + + // CHECK-START: java.lang.Object Inliner.InlineReferenceParameter(java.lang.Object) inliner (after) + // CHECK-DAG: [[Param:l[0-9]+]] ParameterValue + // CHECK-DAG: Return [ [[Param]] ] + + public static Object InlineReferenceParameter(Object o) { + return returnReferenceParameter(o); + } + + // CHECK-START: int Inliner.InlineInt() inliner (before) + // CHECK-DAG: [[Result:i[0-9]+]] InvokeStaticOrDirect + // CHECK-DAG: Return [ [[Result]] ] + + // CHECK-START: int Inliner.InlineInt() inliner (after) + // CHECK-DAG: [[Const4:i[0-9]+]] IntConstant 4 + // CHECK-DAG: Return [ [[Const4]] ] + + public static int InlineInt() { + return returnInt(); + } + + // CHECK-START: long Inliner.InlineWide() inliner (before) + // CHECK-DAG: [[Result:j[0-9]+]] InvokeStaticOrDirect + // CHECK-DAG: Return [ [[Result]] ] + + // CHECK-START: long Inliner.InlineWide() inliner (after) + // CHECK-DAG: [[Const8:j[0-9]+]] LongConstant 8 + // CHECK-DAG: Return [ [[Const8]] ] + + public static long InlineWide() { + return returnWide(); + } + + // CHECK-START: int Inliner.InlineAdd() inliner (before) + // CHECK-DAG: [[Const3:i[0-9]+]] IntConstant 3 + // CHECK-DAG: [[Const5:i[0-9]+]] IntConstant 5 + // CHECK-DAG: [[Result:i[0-9]+]] InvokeStaticOrDirect + // CHECK-DAG: Return [ [[Result]] ] + + // CHECK-START: int Inliner.InlineAdd() inliner (after) + // CHECK-DAG: [[Const3:i[0-9]+]] IntConstant 3 + // CHECK-DAG: [[Const5:i[0-9]+]] IntConstant 5 + // CHECK-DAG: [[Add:i[0-9]+]] Add [ [[Const3]] [[Const5]] ] + // CHECK-DAG: Return [ [[Add]] ] + + public static int InlineAdd() { + return returnAdd(3, 5); + } + + // CHECK-START: int Inliner.InlineFieldAccess() inliner (before) + // CHECK-DAG: [[After:i[0-9]+]] InvokeStaticOrDirect + // CHECK-DAG: Return [ [[After]] ] + + // CHECK-START: int Inliner.InlineFieldAccess() inliner (after) + // CHECK-DAG: [[Const1:i[0-9]+]] IntConstant 1 + // CHECK-DAG: [[Before:i[0-9]+]] StaticFieldGet + // CHECK-DAG: [[After:i[0-9]+]] Add [ [[Before]] [[Const1]] ] + // CHECK-DAG: StaticFieldSet [ {{l[0-9]+}} [[After]] ] + // CHECK-DAG: Return [ [[After]] ] + + // CHECK-START: int Inliner.InlineFieldAccess() inliner (after) + // CHECK-NOT: InvokeStaticOrDirect + + public static int InlineFieldAccess() { + return incCounter(); + } + + // CHECK-START: int Inliner.InlineWithControlFlow(boolean) inliner (before) + // CHECK-DAG: [[Const1:i[0-9]+]] IntConstant 1 + // CHECK-DAG: [[Const3:i[0-9]+]] IntConstant 3 + // CHECK-DAG: [[Const5:i[0-9]+]] IntConstant 5 + // CHECK-DAG: [[Add:i[0-9]+]] InvokeStaticOrDirect [ [[Const1]] [[Const3]] ] + // CHECK-DAG: [[Sub:i[0-9]+]] InvokeStaticOrDirect [ [[Const5]] [[Const3]] ] + // CHECK-DAG: [[Phi:i[0-9]+]] Phi [ [[Add]] [[Sub]] ] + // CHECK-DAG: Return [ [[Phi]] ] + + // CHECK-START: int Inliner.InlineWithControlFlow(boolean) inliner (after) + // CHECK-DAG: [[Const1:i[0-9]+]] IntConstant 1 + // CHECK-DAG: [[Const3:i[0-9]+]] IntConstant 3 + // CHECK-DAG: [[Const5:i[0-9]+]] IntConstant 5 + // CHECK-DAG: [[Add:i[0-9]+]] Add [ [[Const1]] [[Const3]] ] + // CHECK-DAG: [[Sub:i[0-9]+]] Sub [ [[Const5]] [[Const3]] ] + // CHECK-DAG: [[Phi:i[0-9]+]] Phi [ [[Add]] [[Sub]] ] + // CHECK-DAG: Return [ [[Phi]] ] + + public static int InlineWithControlFlow(boolean cond) { + int x, const1, const3, const5; + const1 = 1; + const3 = 3; + const5 = 5; + if (cond) { + x = returnAdd(const1, const3); + } else { + x = returnSub(const5, const3); + } + return x; + } + + + private static void returnVoid() { + return; + } + + private static void returnVoidWithOneParameter(int a) { + return; + } + + private static int returnParameter(int a) { + return a; + } + + private static long returnWideParameter(long a) { + return a; + } + + private static Object returnReferenceParameter(Object o) { + return o; + } + + private static int returnInt() { + return 4; + } + + private static long returnWide() { + return 8L; + } + + private static int returnAdd(int a, int b) { + return a + b; + } + + private static int returnSub(int a, int b) { + return a - b; + } + + private static int counter = 42; + + private static int incCounter() { + return ++counter; + } +} diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index c86ec4b3d6..87b38133fb 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -429,6 +429,8 @@ class ArmAssembler : public Assembler { virtual void ldrex(Register rd, Register rn, Condition cond = AL) = 0; virtual void strex(Register rd, Register rt, Register rn, Condition cond = AL) = 0; + virtual void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) = 0; + virtual void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) = 0; // Miscellaneous instructions. virtual void clrex(Condition cond = AL) = 0; diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index 8f6d45ab53..8d1fb60725 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -778,6 +778,7 @@ void Arm32Assembler::EmitMulOp(Condition cond, int32_t opcode, Emit(encoding); } + void Arm32Assembler::ldrex(Register rt, Register rn, Condition cond) { CHECK_NE(rn, kNoRegister); CHECK_NE(rt, kNoRegister); @@ -793,6 +794,25 @@ void Arm32Assembler::ldrex(Register rt, Register rn, Condition cond) { } +void Arm32Assembler::ldrexd(Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, R14); + CHECK_EQ(0u, static_cast<uint32_t>(rt) % 2); + CHECK_EQ(static_cast<uint32_t>(rt) + 1, static_cast<uint32_t>(rt2)); + CHECK_NE(cond, kNoCondition); + + int32_t encoding = + (static_cast<uint32_t>(cond) << kConditionShift) | + B24 | B23 | B21 | B20 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rt) << 12 | + B11 | B10 | B9 | B8 | B7 | B4 | B3 | B2 | B1 | B0; + Emit(encoding); +} + + void Arm32Assembler::strex(Register rd, Register rt, Register rn, @@ -811,6 +831,28 @@ void Arm32Assembler::strex(Register rd, Emit(encoding); } +void Arm32Assembler::strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rd, kNoRegister); + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, R14); + CHECK_NE(rd, rt); + CHECK_NE(rd, rt2); + CHECK_EQ(0u, static_cast<uint32_t>(rt) % 2); + CHECK_EQ(static_cast<uint32_t>(rt) + 1, static_cast<uint32_t>(rt2)); + CHECK_NE(cond, kNoCondition); + + int32_t encoding = + (static_cast<uint32_t>(cond) << kConditionShift) | + B24 | B23 | B21 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rd) << 12 | + B11 | B10 | B9 | B8 | B7 | B4 | + static_cast<uint32_t>(rt); + Emit(encoding); +} + void Arm32Assembler::clrex(Condition cond) { CHECK_EQ(cond, AL); // This cannot be conditional on ARM. diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 6c8d41587b..b922d66513 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -123,6 +123,8 @@ class Arm32Assembler FINAL : public ArmAssembler { void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE; void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE; + void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; + void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; // Miscellaneous instructions. void clrex(Condition cond = AL) OVERRIDE; diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc index 951792d45b..4a0ae0ba99 100644 --- a/compiler/utils/arm/assembler_arm32_test.cc +++ b/compiler/utils/arm/assembler_arm32_test.cc @@ -697,4 +697,28 @@ TEST_F(AssemblerArm32Test, Vmstat) { DriverStr(expected, "vmrs"); } +TEST_F(AssemblerArm32Test, ldrexd) { + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R0); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R1); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R2); + + const char* expected = + "ldrexd r0, r1, [r0]\n" + "ldrexd r0, r1, [r1]\n" + "ldrexd r0, r1, [r2]\n"; + DriverStr(expected, "ldrexd"); +} + +TEST_F(AssemblerArm32Test, strexd) { + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R0); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R1); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R2); + + const char* expected = + "strexd r9, r0, r1, [r0]\n" + "strexd r9, r0, r1, [r1]\n" + "strexd r9, r0, r1, [r2]\n"; + DriverStr(expected, "strexd"); +} + } // namespace art diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 479186c5d7..3eccd3f9df 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -698,48 +698,37 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED, return true; } - bool can_contain_high_register = (opcode == MOV) - || ((opcode == ADD || opcode == SUB) && (rn == rd)); - - if (IsHighRegister(rd) || IsHighRegister(rn)) { - if (can_contain_high_register) { - // There are high register instructions available for this opcode. - // However, there is no RRX available. - if (so.IsShift() && so.GetShift() == RRX) { - return true; + // Check special case for SP relative ADD and SUB immediate. + if ((opcode == ADD || opcode == SUB) && rn == SP && so.IsImmediate()) { + // If the immediate is in range, use 16 bit. + if (rd == SP) { + if (so.GetImmediate() < (1 << 9)) { // 9 bit immediate. + return false; } + } else if (!IsHighRegister(rd) && opcode == ADD) { + if (so.GetImmediate() < (1 << 10)) { // 10 bit immediate. + return false; + } + } + } - // Check special case for SP relative ADD and SUB immediate. - if ((opcode == ADD || opcode == SUB) && so.IsImmediate()) { - // If rn is SP and rd is a high register we need to use a 32 bit encoding. - if (rn == SP && rd != SP && IsHighRegister(rd)) { - return true; - } + bool can_contain_high_register = (opcode == MOV) + || ((opcode == ADD) && (rn == rd)); - uint32_t imm = so.GetImmediate(); - // If the immediates are out of range use 32 bit. - if (rd == SP && rn == SP) { - if (imm > (1 << 9)) { // 9 bit immediate. - return true; - } - } else if (opcode == ADD && rd != SP && rn == SP) { // 10 bit immediate. - if (imm > (1 << 10)) { - return true; - } - } else if (opcode == SUB && rd != SP && rn == SP) { - // SUB rd, SP, #imm is always 32 bit. - return true; - } - } + if (IsHighRegister(rd) || IsHighRegister(rn)) { + if (!can_contain_high_register) { + return true; } - // The ADD,SUB and MOV instructions that work with high registers don't have - // immediate variants. - if (so.IsImmediate()) { + // There are high register instructions available for this opcode. + // However, there is no actual shift available, neither for ADD nor for MOV (ASR/LSR/LSL/ROR). + if (so.IsShift() && (so.GetShift() == RRX || so.GetImmediate() != 0u)) { return true; } - if (!can_contain_high_register) { + // The ADD and MOV instructions that work with high registers don't have 16-bit + // immediate variants. + if (so.IsImmediate()) { return true; } } @@ -1030,7 +1019,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, uint8_t rn_shift = 3; uint8_t immediate_shift = 0; bool use_immediate = false; - uint8_t immediate = 0; + uint32_t immediate = 0; // Should be at most 9 bits but keep the full immediate for CHECKs. uint8_t thumb_opcode;; if (so.IsImmediate()) { @@ -1066,8 +1055,8 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, dp_opcode = 2U /* 0b10 */; thumb_opcode = 3U /* 0b11 */; opcode_shift = 12; - CHECK_LT(immediate, (1 << 9)); - CHECK_EQ((immediate & 3 /* 0b11 */), 0); + CHECK_LT(immediate, (1u << 9)); + CHECK_EQ((immediate & 3u /* 0b11 */), 0u); // Remove rd and rn from instruction by orring it with immed and clearing bits. rn = R0; @@ -1080,8 +1069,8 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, dp_opcode = 2U /* 0b10 */; thumb_opcode = 5U /* 0b101 */; opcode_shift = 11; - CHECK_LT(immediate, (1 << 10)); - CHECK_EQ((immediate & 3 /* 0b11 */), 0); + CHECK_LT(immediate, (1u << 10)); + CHECK_EQ((immediate & 3u /* 0b11 */), 0u); // Remove rn from instruction. rn = R0; @@ -1117,8 +1106,8 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, dp_opcode = 2U /* 0b10 */; thumb_opcode = 0x61 /* 0b1100001 */; opcode_shift = 7; - CHECK_LT(immediate, (1 << 9)); - CHECK_EQ((immediate & 3 /* 0b11 */), 0); + CHECK_LT(immediate, (1u << 9)); + CHECK_EQ((immediate & 3u /* 0b11 */), 0u); // Remove rd and rn from instruction by orring it with immed and clearing bits. rn = R0; @@ -1673,9 +1662,6 @@ void Thumb2Assembler::ldrex(Register rt, Register rn, uint16_t imm, Condition co CHECK_NE(rn, kNoRegister); CHECK_NE(rt, kNoRegister); CheckCondition(cond); - CHECK_NE(rn, kNoRegister); - CHECK_NE(rt, kNoRegister); - CheckCondition(cond); CHECK_LT(imm, (1u << 10)); int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 | @@ -1712,6 +1698,22 @@ void Thumb2Assembler::strex(Register rd, } +void Thumb2Assembler::ldrexd(Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, rt2); + CheckCondition(cond); + + int32_t encoding = B31 | B30 | B29 | B27 | B23 | B22 | B20 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rt) << 12 | + static_cast<uint32_t>(rt2) << 8 | + B6 | B5 | B4 | B3 | B2 | B1 | B0; + Emit32(encoding); +} + + void Thumb2Assembler::strex(Register rd, Register rt, Register rn, @@ -1720,6 +1722,26 @@ void Thumb2Assembler::strex(Register rd, } +void Thumb2Assembler::strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond) { + CHECK_NE(rd, kNoRegister); + CHECK_NE(rn, kNoRegister); + CHECK_NE(rt, kNoRegister); + CHECK_NE(rt2, kNoRegister); + CHECK_NE(rt, rt2); + CHECK_NE(rd, rt); + CHECK_NE(rd, rt2); + CheckCondition(cond); + + int32_t encoding = B31 | B30 | B29 | B27 | B23 | B22 | + static_cast<uint32_t>(rn) << 16 | + static_cast<uint32_t>(rt) << 12 | + static_cast<uint32_t>(rt2) << 8 | + B6 | B5 | B4 | + static_cast<uint32_t>(rd); + Emit32(encoding); +} + + void Thumb2Assembler::clrex(Condition cond) { CheckCondition(cond); int32_t encoding = B31 | B30 | B29 | B27 | B28 | B25 | B24 | B23 | diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 48a3a7eeb2..81dd13894f 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -149,6 +149,8 @@ class Thumb2Assembler FINAL : public ArmAssembler { void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL); void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL); + void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; + void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; // Miscellaneous instructions. void clrex(Condition cond = AL) OVERRIDE; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 6ae95a40e6..425ccd7ea3 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -164,4 +164,32 @@ TEST_F(AssemblerThumb2Test, Vmstat) { DriverStr(expected, "vmrs"); } +TEST_F(AssemblerThumb2Test, ldrexd) { + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R0); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R1); + GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R2); + GetAssembler()->ldrexd(arm::R5, arm::R3, arm::R7); + + const char* expected = + "ldrexd r0, r1, [r0]\n" + "ldrexd r0, r1, [r1]\n" + "ldrexd r0, r1, [r2]\n" + "ldrexd r5, r3, [r7]\n"; + DriverStr(expected, "ldrexd"); +} + +TEST_F(AssemblerThumb2Test, strexd) { + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R0); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R1); + GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R2); + GetAssembler()->strexd(arm::R9, arm::R5, arm::R3, arm::R7); + + const char* expected = + "strexd r9, r0, r1, [r0]\n" + "strexd r9, r0, r1, [r1]\n" + "strexd r9, r0, r1, [r2]\n" + "strexd r9, r5, r3, [r7]\n"; + DriverStr(expected, "strexd"); +} + } // namespace art diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h index 1a7f2e8c02..b1b0ee5e53 100644 --- a/compiler/utils/array_ref.h +++ b/compiler/utils/array_ref.h @@ -84,7 +84,7 @@ class ArrayRef { template <typename U, typename Alloc> ArrayRef(const std::vector<U, Alloc>& v, - typename std::enable_if<std::is_same<T, const U>::value, tag>::tag + typename std::enable_if<std::is_same<T, const U>::value, tag>::type t ATTRIBUTE_UNUSED = tag()) : array_(v.data()), size_(v.size()) { } diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 67711e312c..134dda4b2c 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -502,6 +502,8 @@ class Assembler { virtual void InitializeFrameDescriptionEntry() {} virtual void FinalizeFrameDescriptionEntry() {} + // Give a vector containing FDE data, or null if not used. Note: the assembler must take care + // of handling the lifecycle. virtual std::vector<uint8_t>* GetFrameDescriptionEntry() { return nullptr; } virtual ~Assembler() {} diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 3f2641c76f..3d03234e04 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -102,11 +102,11 @@ const char* DataProcessingShiftedRegisterResults[] = { " 4: 11a3 asrs r3, r4, #6\n", " 6: ea4f 13f4 mov.w r3, r4, ror #7\n", " a: 41e3 rors r3, r4\n", - " c: 0128 lsls r0, r5, #4\n", - " e: 0968 lsrs r0, r5, #5\n", - " 10: 11a8 asrs r0, r5, #6\n", - " 12: ea4f 18f4 mov.w r8, r4, ror #7\n", - " 16: ea4f 0834 mov.w r8, r4, rrx\n", + " c: ea4f 1804 mov.w r8, r4, lsl #4\n", + " 10: ea4f 1854 mov.w r8, r4, lsr #5\n", + " 14: ea4f 18a4 mov.w r8, r4, asr #6\n", + " 18: ea4f 18f4 mov.w r8, r4, ror #7\n", + " 1c: ea4f 0834 mov.w r8, r4, rrx\n", nullptr }; const char* BasicLoadResults[] = { @@ -340,15 +340,15 @@ const char* MovWMovTResults[] = { nullptr }; const char* SpecialAddSubResults[] = { - " 0: f20d 0250 addw r2, sp, #80 ; 0x50\n", - " 4: f20d 0d50 addw sp, sp, #80 ; 0x50\n", - " 8: f20d 0850 addw r8, sp, #80 ; 0x50\n", - " c: f60d 7200 addw r2, sp, #3840 ; 0xf00\n", - " 10: f60d 7d00 addw sp, sp, #3840 ; 0xf00\n", - " 14: f2ad 0d50 subw sp, sp, #80 ; 0x50\n", - " 18: f2ad 0050 subw r0, sp, #80 ; 0x50\n", - " 1c: f2ad 0850 subw r8, sp, #80 ; 0x50\n", - " 20: f6ad 7d00 subw sp, sp, #3840 ; 0xf00\n", + " 0: aa14 add r2, sp, #80 ; 0x50\n", + " 2: b014 add sp, #80 ; 0x50\n", + " 4: f20d 0850 addw r8, sp, #80 ; 0x50\n", + " 8: f60d 7200 addw r2, sp, #3840 ; 0xf00\n", + " c: f60d 7d00 addw sp, sp, #3840 ; 0xf00\n", + " 10: b094 sub sp, #80 ; 0x50\n", + " 12: f2ad 0050 subw r0, sp, #80 ; 0x50\n", + " 16: f2ad 0850 subw r8, sp, #80 ; 0x50\n", + " 1a: f6ad 7d00 subw sp, sp, #3840 ; 0xf00\n", nullptr }; const char* StoreToOffsetResults[] = { diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h index 4c52174936..b062a2aa86 100644 --- a/compiler/utils/dedupe_set.h +++ b/compiler/utils/dedupe_set.h @@ -17,50 +17,89 @@ #ifndef ART_COMPILER_UTILS_DEDUPE_SET_H_ #define ART_COMPILER_UTILS_DEDUPE_SET_H_ +#include <algorithm> +#include <inttypes.h> +#include <memory> #include <set> #include <string> #include "base/mutex.h" #include "base/stl_util.h" #include "base/stringprintf.h" +#include "utils/swap_space.h" namespace art { // A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the // Add method. The data-structure is thread-safe through the use of internal locks, it also // supports the lock being sharded. -template <typename Key, typename HashType, typename HashFunc, HashType kShard = 1> +template <typename InKey, typename StoreKey, typename HashType, typename HashFunc, + HashType kShard = 1> class DedupeSet { - typedef std::pair<HashType, Key*> HashedKey; + typedef std::pair<HashType, const InKey*> HashedInKey; + struct HashedKey { + StoreKey* store_ptr; + union { + HashType store_hash; // Valid if store_ptr != nullptr. + const HashedInKey* in_key; // Valid if store_ptr == nullptr. + }; + }; class Comparator { public: bool operator()(const HashedKey& a, const HashedKey& b) const { - if (a.first != b.first) { - return a.first < b.first; + HashType a_hash = (a.store_ptr != nullptr) ? a.store_hash : a.in_key->first; + HashType b_hash = (b.store_ptr != nullptr) ? b.store_hash : b.in_key->first; + if (a_hash != b_hash) { + return a_hash < b_hash; + } + if (a.store_ptr != nullptr && b.store_ptr != nullptr) { + return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(), + b.store_ptr->begin(), b.store_ptr->end()); + } else if (a.store_ptr != nullptr && b.store_ptr == nullptr) { + return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(), + b.in_key->second->begin(), b.in_key->second->end()); + } else if (a.store_ptr == nullptr && b.store_ptr != nullptr) { + return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(), + b.store_ptr->begin(), b.store_ptr->end()); } else { - return *a.second < *b.second; + return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(), + b.in_key->second->begin(), b.in_key->second->end()); } } }; public: - Key* Add(Thread* self, const Key& key) { + StoreKey* Add(Thread* self, const InKey& key) { + uint64_t hash_start; + if (kIsDebugBuild) { + hash_start = NanoTime(); + } HashType raw_hash = HashFunc()(key); + if (kIsDebugBuild) { + uint64_t hash_end = NanoTime(); + hash_time_ += hash_end - hash_start; + } HashType shard_hash = raw_hash / kShard; HashType shard_bin = raw_hash % kShard; - HashedKey hashed_key(shard_hash, const_cast<Key*>(&key)); + HashedInKey hashed_in_key(shard_hash, &key); + HashedKey hashed_key; + hashed_key.store_ptr = nullptr; + hashed_key.in_key = &hashed_in_key; MutexLock lock(self, *lock_[shard_bin]); auto it = keys_[shard_bin].find(hashed_key); if (it != keys_[shard_bin].end()) { - return it->second; + DCHECK(it->store_ptr != nullptr); + return it->store_ptr; } - hashed_key.second = new Key(key); + hashed_key.store_ptr = CreateStoreKey(key); + hashed_key.store_hash = shard_hash; keys_[shard_bin].insert(hashed_key); - return hashed_key.second; + return hashed_key.store_ptr; } - explicit DedupeSet(const char* set_name) { + explicit DedupeSet(const char* set_name, SwapAllocator<void>& alloc) + : allocator_(alloc), hash_time_(0) { for (HashType i = 0; i < kShard; ++i) { std::ostringstream oss; oss << set_name << " lock " << i; @@ -70,15 +109,59 @@ class DedupeSet { } ~DedupeSet() { - for (HashType i = 0; i < kShard; ++i) { - STLDeleteValues(&keys_[i]); + // Have to manually free all pointers. + for (auto& shard : keys_) { + for (const auto& hashed_key : shard) { + DCHECK(hashed_key.store_ptr != nullptr); + DeleteStoreKey(hashed_key.store_ptr); + } + } + } + + std::string DumpStats() const { + size_t collision_sum = 0; + size_t collision_max = 0; + for (HashType shard = 0; shard < kShard; ++shard) { + HashType last_hash = 0; + size_t collision_cur_max = 0; + for (const HashedKey& key : keys_[shard]) { + DCHECK(key.store_ptr != nullptr); + if (key.store_hash == last_hash) { + collision_cur_max++; + if (collision_cur_max > 1) { + collision_sum++; + if (collision_cur_max > collision_max) { + collision_max = collision_cur_max; + } + } + } else { + collision_cur_max = 1; + last_hash = key.store_hash; + } + } } + return StringPrintf("%zu collisions, %zu max bucket size, %" PRIu64 " ns hash time", + collision_sum, collision_max, hash_time_); } private: + StoreKey* CreateStoreKey(const InKey& key) { + StoreKey* ret = allocator_.allocate(1); + allocator_.construct(ret, key.begin(), key.end(), allocator_); + return ret; + } + + void DeleteStoreKey(StoreKey* key) { + SwapAllocator<StoreKey> alloc(allocator_); + alloc.destroy(key); + alloc.deallocate(key, 1); + } + std::string lock_name_[kShard]; std::unique_ptr<Mutex> lock_[kShard]; std::set<HashedKey, Comparator> keys_[kShard]; + SwapAllocator<StoreKey> allocator_; + uint64_t hash_time_; DISALLOW_COPY_AND_ASSIGN(DedupeSet); }; diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc index 8abe6debc1..637964e484 100644 --- a/compiler/utils/dedupe_set_test.cc +++ b/compiler/utils/dedupe_set_test.cc @@ -15,6 +15,10 @@ */ #include "dedupe_set.h" + +#include <algorithm> +#include <cstdio> + #include "gtest/gtest.h" #include "thread-inl.h" @@ -35,19 +39,22 @@ class DedupeHashFunc { TEST(DedupeSetTest, Test) { Thread* self = Thread::Current(); typedef std::vector<uint8_t> ByteArray; - DedupeSet<ByteArray, size_t, DedupeHashFunc> deduplicator("test"); - ByteArray* array1; + SwapAllocator<void> swap(nullptr); + DedupeSet<ByteArray, SwapVector<uint8_t>, size_t, DedupeHashFunc> deduplicator("test", swap); + SwapVector<uint8_t>* array1; { ByteArray test1; test1.push_back(10); test1.push_back(20); test1.push_back(30); test1.push_back(45); + array1 = deduplicator.Add(self, test1); - ASSERT_EQ(test1, *array1); + ASSERT_NE(array1, nullptr); + ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array1->begin())); } - ByteArray* array2; + SwapVector<uint8_t>* array2; { ByteArray test1; test1.push_back(10); @@ -56,10 +63,10 @@ TEST(DedupeSetTest, Test) { test1.push_back(45); array2 = deduplicator.Add(self, test1); ASSERT_EQ(array2, array1); - ASSERT_EQ(test1, *array2); + ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array2->begin())); } - ByteArray* array3; + SwapVector<uint8_t>* array3; { ByteArray test1; test1.push_back(10); @@ -67,8 +74,8 @@ TEST(DedupeSetTest, Test) { test1.push_back(30); test1.push_back(47); array3 = deduplicator.Add(self, test1); - ASSERT_NE(array3, &test1); - ASSERT_EQ(test1, *array3); + ASSERT_NE(array3, nullptr); + ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array3->begin())); } } diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc new file mode 100644 index 0000000000..325ee4fa01 --- /dev/null +++ b/compiler/utils/swap_space.cc @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "swap_space.h" + +#include <algorithm> +#include <numeric> + +#include "base/logging.h" +#include "base/macros.h" +#include "base/mutex.h" +#include "thread-inl.h" + +namespace art { + +// The chunk size by which the swap file is increased and mapped. +static constexpr size_t kMininumMapSize = 16 * MB; + +static constexpr bool kCheckFreeMaps = false; + +template <typename FreeBySizeSet> +static void DumpFreeMap(const FreeBySizeSet& free_by_size) { + size_t last_size = static_cast<size_t>(-1); + for (const auto& entry : free_by_size) { + if (last_size != entry.first) { + last_size = entry.first; + LOG(INFO) << "Size " << last_size; + } + LOG(INFO) << " 0x" << std::hex << entry.second->Start() + << " size=" << std::dec << entry.second->size; + } +} + +template <typename FreeByStartSet, typename FreeBySizeSet> +static void RemoveChunk(FreeByStartSet* free_by_start, + FreeBySizeSet* free_by_size, + typename FreeBySizeSet::const_iterator free_by_size_pos) { + auto free_by_start_pos = free_by_size_pos->second; + free_by_size->erase(free_by_size_pos); + free_by_start->erase(free_by_start_pos); +} + +template <typename FreeByStartSet, typename FreeBySizeSet> +static void InsertChunk(FreeByStartSet* free_by_start, + FreeBySizeSet* free_by_size, + const SpaceChunk& chunk) { + DCHECK_NE(chunk.size, 0u); + auto insert_result = free_by_start->insert(chunk); + DCHECK(insert_result.second); + free_by_size->emplace(chunk.size, insert_result.first); +} + +SwapSpace::SwapSpace(int fd, size_t initial_size) + : fd_(fd), + size_(0), + lock_("SwapSpace lock", static_cast<LockLevel>(LockLevel::kDefaultMutexLevel - 1)) { + // Assume that the file is unlinked. + + InsertChunk(&free_by_start_, &free_by_size_, NewFileChunk(initial_size)); +} + +SwapSpace::~SwapSpace() { + // All arenas are backed by the same file. Just close the descriptor. + close(fd_); +} + +template <typename FreeByStartSet, typename FreeBySizeSet> +static size_t CollectFree(const FreeByStartSet& free_by_start, const FreeBySizeSet& free_by_size) { + if (free_by_start.size() != free_by_size.size()) { + LOG(FATAL) << "Size: " << free_by_start.size() << " vs " << free_by_size.size(); + } + + // Calculate over free_by_size. + size_t sum1 = 0; + for (const auto& entry : free_by_size) { + sum1 += entry.second->size; + } + + // Calculate over free_by_start. + size_t sum2 = 0; + for (const auto& entry : free_by_start) { + sum2 += entry.size; + } + + if (sum1 != sum2) { + LOG(FATAL) << "Sum: " << sum1 << " vs " << sum2; + } + return sum1; +} + +void* SwapSpace::Alloc(size_t size) { + MutexLock lock(Thread::Current(), lock_); + size = RoundUp(size, 8U); + + // Check the free list for something that fits. + // TODO: Smarter implementation. Global biggest chunk, ... + SpaceChunk old_chunk; + auto it = free_by_start_.empty() + ? free_by_size_.end() + : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() }); + if (it != free_by_size_.end()) { + old_chunk = *it->second; + RemoveChunk(&free_by_start_, &free_by_size_, it); + } else { + // Not a big enough free chunk, need to increase file size. + old_chunk = NewFileChunk(size); + } + + void* ret = old_chunk.ptr; + + if (old_chunk.size != size) { + // Insert the remainder. + SpaceChunk new_chunk = { old_chunk.ptr + size, old_chunk.size - size }; + InsertChunk(&free_by_start_, &free_by_size_, new_chunk); + } + + return ret; +} + +SpaceChunk SwapSpace::NewFileChunk(size_t min_size) { +#if !defined(__APPLE__) + size_t next_part = std::max(RoundUp(min_size, kPageSize), RoundUp(kMininumMapSize, kPageSize)); + int result = TEMP_FAILURE_RETRY(ftruncate64(fd_, size_ + next_part)); + if (result != 0) { + PLOG(FATAL) << "Unable to increase swap file."; + } + uint8_t* ptr = reinterpret_cast<uint8_t*>( + mmap(nullptr, next_part, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, size_)); + if (ptr == MAP_FAILED) { + LOG(ERROR) << "Unable to mmap new swap file chunk."; + LOG(ERROR) << "Current size: " << size_ << " requested: " << next_part << "/" << min_size; + LOG(ERROR) << "Free list:"; + MutexLock lock(Thread::Current(), lock_); + DumpFreeMap(free_by_size_); + LOG(ERROR) << "In free list: " << CollectFree(free_by_start_, free_by_size_); + LOG(FATAL) << "Aborting..."; + } + size_ += next_part; + SpaceChunk new_chunk = {ptr, next_part}; + maps_.push_back(new_chunk); + return new_chunk; +#else + UNUSED(min_size, kMininumMapSize); + LOG(FATAL) << "No swap file support on the Mac."; + UNREACHABLE(); +#endif +} + +// TODO: Full coalescing. +void SwapSpace::Free(void* ptrV, size_t size) { + MutexLock lock(Thread::Current(), lock_); + size = RoundUp(size, 8U); + + size_t free_before = 0; + if (kCheckFreeMaps) { + free_before = CollectFree(free_by_start_, free_by_size_); + } + + SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptrV), size }; + auto it = free_by_start_.lower_bound(chunk); + if (it != free_by_start_.begin()) { + auto prev = it; + --prev; + CHECK_LE(prev->End(), chunk.Start()); + if (prev->End() == chunk.Start()) { + // Merge *prev with this chunk. + chunk.size += prev->size; + chunk.ptr -= prev->size; + auto erase_pos = free_by_size_.find(FreeBySizeEntry { prev->size, prev }); + DCHECK(erase_pos != free_by_size_.end()); + RemoveChunk(&free_by_start_, &free_by_size_, erase_pos); + // "prev" is invalidated but "it" remains valid. + } + } + if (it != free_by_start_.end()) { + CHECK_LE(chunk.End(), it->Start()); + if (chunk.End() == it->Start()) { + // Merge *it with this chunk. + chunk.size += it->size; + auto erase_pos = free_by_size_.find(FreeBySizeEntry { it->size, it }); + DCHECK(erase_pos != free_by_size_.end()); + RemoveChunk(&free_by_start_, &free_by_size_, erase_pos); + // "it" is invalidated but we don't need it anymore. + } + } + InsertChunk(&free_by_start_, &free_by_size_, chunk); + + if (kCheckFreeMaps) { + size_t free_after = CollectFree(free_by_start_, free_by_size_); + + if (free_after != free_before + size) { + DumpFreeMap(free_by_size_); + CHECK_EQ(free_after, free_before + size) << "Should be " << size << " difference from " << free_before; + } + } +} + +} // namespace art diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h new file mode 100644 index 0000000000..2d0d77af78 --- /dev/null +++ b/compiler/utils/swap_space.h @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_SWAP_SPACE_H_ +#define ART_COMPILER_UTILS_SWAP_SPACE_H_ + +#include <cstdlib> +#include <list> +#include <set> +#include <stdint.h> +#include <stddef.h> + +#include "base/logging.h" +#include "base/macros.h" +#include "base/mutex.h" +#include "mem_map.h" +#include "utils.h" +#include "utils/debug_stack.h" + +namespace art { + +// Chunk of space. +struct SpaceChunk { + uint8_t* ptr; + size_t size; + + uintptr_t Start() const { + return reinterpret_cast<uintptr_t>(ptr); + } + uintptr_t End() const { + return reinterpret_cast<uintptr_t>(ptr) + size; + } +}; + +inline bool operator==(const SpaceChunk& lhs, const SpaceChunk& rhs) { + return (lhs.size == rhs.size) && (lhs.ptr == rhs.ptr); +} + +class SortChunkByPtr { + public: + bool operator()(const SpaceChunk& a, const SpaceChunk& b) const { + return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr); + } +}; + +// An arena pool that creates arenas backed by an mmaped file. +class SwapSpace { + public: + SwapSpace(int fd, size_t initial_size); + ~SwapSpace(); + void* Alloc(size_t size) LOCKS_EXCLUDED(lock_); + void Free(void* ptr, size_t size) LOCKS_EXCLUDED(lock_); + + size_t GetSize() { + return size_; + } + + private: + SpaceChunk NewFileChunk(size_t min_size); + + int fd_; + size_t size_; + std::list<SpaceChunk> maps_; + + // NOTE: Boost.Bimap would be useful for the two following members. + + // Map start of a free chunk to its size. + typedef std::set<SpaceChunk, SortChunkByPtr> FreeByStartSet; + FreeByStartSet free_by_start_ GUARDED_BY(lock_); + + // Map size to an iterator to free_by_start_'s entry. + typedef std::pair<size_t, FreeByStartSet::const_iterator> FreeBySizeEntry; + struct FreeBySizeComparator { + bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) { + if (lhs.first != rhs.first) { + return lhs.first < rhs.first; + } else { + return lhs.second->Start() < rhs.second->Start(); + } + } + }; + typedef std::set<FreeBySizeEntry, FreeBySizeComparator> FreeBySizeSet; + FreeBySizeSet free_by_size_ GUARDED_BY(lock_); + + mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + DISALLOW_COPY_AND_ASSIGN(SwapSpace); +}; + +template <typename T> class SwapAllocator; + +template <> +class SwapAllocator<void> { + public: + typedef void value_type; + typedef void* pointer; + typedef const void* const_pointer; + + template <typename U> + struct rebind { + typedef SwapAllocator<U> other; + }; + + explicit SwapAllocator(SwapSpace* swap_space) : swap_space_(swap_space) {} + + template <typename U> + SwapAllocator(const SwapAllocator<U>& other) : swap_space_(other.swap_space_) {} + + SwapAllocator(const SwapAllocator& other) = default; + SwapAllocator& operator=(const SwapAllocator& other) = default; + ~SwapAllocator() = default; + + private: + SwapSpace* swap_space_; + + template <typename U> + friend class SwapAllocator; +}; + +template <typename T> +class SwapAllocator { + public: + typedef T value_type; + typedef T* pointer; + typedef T& reference; + typedef const T* const_pointer; + typedef const T& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + template <typename U> + struct rebind { + typedef SwapAllocator<U> other; + }; + + explicit SwapAllocator(SwapSpace* swap_space) : swap_space_(swap_space) {} + + template <typename U> + SwapAllocator(const SwapAllocator<U>& other) : swap_space_(other.swap_space_) {} + + SwapAllocator(const SwapAllocator& other) = default; + SwapAllocator& operator=(const SwapAllocator& other) = default; + ~SwapAllocator() = default; + + size_type max_size() const { + return static_cast<size_type>(-1) / sizeof(T); + } + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + pointer allocate(size_type n, SwapAllocator<void>::pointer hint ATTRIBUTE_UNUSED = nullptr) { + DCHECK_LE(n, max_size()); + if (swap_space_ == nullptr) { + return reinterpret_cast<T*>(malloc(n * sizeof(T))); + } else { + return reinterpret_cast<T*>(swap_space_->Alloc(n * sizeof(T))); + } + } + void deallocate(pointer p, size_type n) { + if (swap_space_ == nullptr) { + free(p); + } else { + swap_space_->Free(p, n * sizeof(T)); + } + } + + void construct(pointer p, const_reference val) { + new (static_cast<void*>(p)) value_type(val); + } + template <class U, class... Args> + void construct(U* p, Args&&... args) { + ::new (static_cast<void*>(p)) U(std::forward<Args>(args)...); + } + void destroy(pointer p) { + p->~value_type(); + } + + inline bool operator==(SwapAllocator const& other) { + return swap_space_ == other.swap_space_; + } + inline bool operator!=(SwapAllocator const& other) { + return !operator==(other); + } + + private: + SwapSpace* swap_space_; + + template <typename U> + friend class SwapAllocator; +}; + +template <typename T> +using SwapVector = std::vector<T, SwapAllocator<T>>; +template <typename T, typename Comparator> +using SwapSet = std::set<T, Comparator, SwapAllocator<T>>; + +} // namespace art + +#endif // ART_COMPILER_UTILS_SWAP_SPACE_H_ diff --git a/compiler/utils/swap_space_test.cc b/compiler/utils/swap_space_test.cc new file mode 100644 index 0000000000..bf50ac3209 --- /dev/null +++ b/compiler/utils/swap_space_test.cc @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/swap_space.h" + +#include <cstdio> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "gtest/gtest.h" + +#include "base/unix_file/fd_file.h" +#include "common_runtime_test.h" +#include "os.h" + +namespace art { + +class SwapSpaceTest : public CommonRuntimeTest { +}; + +static void SwapTest(bool use_file) { + ScratchFile scratch; + int fd = scratch.GetFd(); + unlink(scratch.GetFilename().c_str()); + + SwapSpace pool(fd, 1 * MB); + SwapAllocator<void> alloc(use_file ? &pool : nullptr); + + SwapVector<int32_t> v(alloc); + v.reserve(1000000); + for (int32_t i = 0; i < 1000000; ++i) { + v.push_back(i); + EXPECT_EQ(i, v[i]); + } + + SwapVector<int32_t> v2(alloc); + v2.reserve(1000000); + for (int32_t i = 0; i < 1000000; ++i) { + v2.push_back(i); + EXPECT_EQ(i, v2[i]); + } + + SwapVector<int32_t> v3(alloc); + v3.reserve(500000); + for (int32_t i = 0; i < 1000000; ++i) { + v3.push_back(i); + EXPECT_EQ(i, v2[i]); + } + + // Verify contents. + for (int32_t i = 0; i < 1000000; ++i) { + EXPECT_EQ(i, v[i]); + EXPECT_EQ(i, v2[i]); + EXPECT_EQ(i, v3[i]); + } + + scratch.Close(); +} + +TEST_F(SwapSpaceTest, Memory) { + SwapTest(false); +} + +TEST_F(SwapSpaceTest, Swap) { + SwapTest(true); +} + +} // namespace art diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index f0353f6cd2..83584a2dcb 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -443,6 +443,27 @@ void X86Assembler::movsd(XmmRegister dst, XmmRegister src) { } +void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::punpckldq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x62); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::addsd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); @@ -1351,38 +1372,6 @@ void X86Assembler::LoadDoubleConstant(XmmRegister dst, double value) { } -void X86Assembler::FloatNegate(XmmRegister f) { - static const struct { - uint32_t a; - uint32_t b; - uint32_t c; - uint32_t d; - } float_negate_constant __attribute__((aligned(16))) = - { 0x80000000, 0x00000000, 0x80000000, 0x00000000 }; - xorps(f, Address::Absolute(reinterpret_cast<uintptr_t>(&float_negate_constant))); -} - - -void X86Assembler::DoubleNegate(XmmRegister d) { - static const struct { - uint64_t a; - uint64_t b; - } double_negate_constant __attribute__((aligned(16))) = - {0x8000000000000000LL, 0x8000000000000000LL}; - xorpd(d, Address::Absolute(reinterpret_cast<uintptr_t>(&double_negate_constant))); -} - - -void X86Assembler::DoubleAbs(XmmRegister reg) { - static const struct { - uint64_t a; - uint64_t b; - } double_abs_constant __attribute__((aligned(16))) = - {0x7FFFFFFFFFFFFFFFLL, 0x7FFFFFFFFFFFFFFFLL}; - andpd(reg, Address::Absolute(reinterpret_cast<uintptr_t>(&double_abs_constant))); -} - - void X86Assembler::Align(int alignment, int offset) { CHECK(IsPowerOfTwo(alignment)); // Emit nop instruction until the real position is aligned. diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 9fecf1edf0..ad070673e8 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -274,6 +274,9 @@ class X86Assembler FINAL : public Assembler { void movsd(const Address& dst, XmmRegister src); void movsd(XmmRegister dst, XmmRegister src); + void psrlq(XmmRegister reg, const Immediate& shift_count); + void punpckldq(XmmRegister dst, XmmRegister src); + void addsd(XmmRegister dst, XmmRegister src); void addsd(XmmRegister dst, const Address& src); void subsd(XmmRegister dst, XmmRegister src); @@ -444,11 +447,6 @@ class X86Assembler FINAL : public Assembler { void LoadLongConstant(XmmRegister dst, int64_t value); void LoadDoubleConstant(XmmRegister dst, double value); - void DoubleNegate(XmmRegister d); - void FloatNegate(XmmRegister f); - - void DoubleAbs(XmmRegister reg); - void LockCmpxchgl(const Address& address, Register reg) { lock()->cmpxchgl(address, reg); } diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index d901673691..fccb510afb 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -105,6 +105,18 @@ TEST_F(AssemblerX86Test, Movl) { DriverStr(expected, "movl"); } +TEST_F(AssemblerX86Test, psrlq) { + GetAssembler()->psrlq(x86::XMM0, CreateImmediate(32)); + const char* expected = "psrlq $0x20, %xmm0\n"; + DriverStr(expected, "psrlq"); +} + +TEST_F(AssemblerX86Test, punpckldq) { + GetAssembler()->punpckldq(x86::XMM0, x86::XMM1); + const char* expected = "punpckldq %xmm1, %xmm0\n"; + DriverStr(expected, "punpckldq"); +} + TEST_F(AssemblerX86Test, LoadLongConstant) { GetAssembler()->LoadLongConstant(x86::XMM0, 51); const char* expected = diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 2a6c58e128..d843a7213f 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1768,38 +1768,6 @@ void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) { } -void X86_64Assembler::FloatNegate(XmmRegister f) { - static const struct { - uint32_t a; - uint32_t b; - uint32_t c; - uint32_t d; - } float_negate_constant __attribute__((aligned(16))) = - { 0x80000000, 0x00000000, 0x80000000, 0x00000000 }; - xorps(f, Address::Absolute(reinterpret_cast<uintptr_t>(&float_negate_constant))); -} - - -void X86_64Assembler::DoubleNegate(XmmRegister d) { - static const struct { - uint64_t a; - uint64_t b; - } double_negate_constant __attribute__((aligned(16))) = - {0x8000000000000000LL, 0x8000000000000000LL}; - xorpd(d, Address::Absolute(reinterpret_cast<uintptr_t>(&double_negate_constant))); -} - - -void X86_64Assembler::DoubleAbs(XmmRegister reg) { - static const struct { - uint64_t a; - uint64_t b; - } double_abs_constant __attribute__((aligned(16))) = - {0x7FFFFFFFFFFFFFFFLL, 0x7FFFFFFFFFFFFFFFLL}; - andpd(reg, Address::Absolute(reinterpret_cast<uintptr_t>(&double_abs_constant))); -} - - void X86_64Assembler::Align(int alignment, int offset) { CHECK(IsPowerOfTwo(alignment)); // Emit nop instruction until the real position is aligned. diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 51d1de2c0f..ac8bc9ab49 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -178,7 +178,7 @@ class Address : public Operand { } void Init(CpuRegister base_in, int32_t disp) { - if (disp == 0 && base_in.AsRegister() != RBP) { + if (disp == 0 && base_in.LowBits() != RBP) { SetModRM(0, base_in); if (base_in.AsRegister() == RSP) { SetSIB(TIMES_1, CpuRegister(RSP), base_in); @@ -208,7 +208,7 @@ class Address : public Operand { Address(CpuRegister base_in, CpuRegister index_in, ScaleFactor scale_in, int32_t disp) { CHECK_NE(index_in.AsRegister(), RSP); // Illegal addressing mode. - if (disp == 0 && base_in.AsRegister() != RBP) { + if (disp == 0 && base_in.LowBits() != RBP) { SetModRM(0, CpuRegister(RSP)); SetSIB(scale_in, index_in, base_in); } else if (disp >= -128 && disp <= 127) { @@ -512,11 +512,6 @@ class X86_64Assembler FINAL : public Assembler { void LoadDoubleConstant(XmmRegister dst, double value); - void DoubleNegate(XmmRegister d); - void FloatNegate(XmmRegister f); - - void DoubleAbs(XmmRegister reg); - void LockCmpxchgl(const Address& address, CpuRegister reg) { lock()->cmpxchgl(address, reg); } diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index c8e923c9d6..b8d724d771 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -536,10 +536,16 @@ TEST_F(AssemblerX86_64Test, Movl) { x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12)); GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::Address( x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12)); + GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address( + x86_64::CpuRegister(x86_64::R13), 0)); + GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address( + x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0)); const char* expected = "movl 0xc(%RDI,%RBX,4), %EAX\n" "movl 0xc(%RDI,%R9,4), %EAX\n" - "movl 0xc(%RDI,%R9,4), %R8d\n"; + "movl 0xc(%RDI,%R9,4), %R8d\n" + "movl (%R13), %EAX\n" + "movl (%R13,%R9,1), %EAX\n"; DriverStr(expected, "movl"); } diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 2cbfffaea4..63009bf25e 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -248,6 +248,12 @@ static void UsageError(const char* fmt, ...) { UsageError(" Used to specify a pass specific option. The setting itself must be integer."); UsageError(" Separator used between options is a comma."); UsageError(""); + UsageError(" --swap-file=<file-name>: specifies a file to use for swap."); + UsageError(" Example: --swap-file=/data/tmp/swap.001"); + UsageError(""); + UsageError(" --swap-fd=<file-descriptor>: specifies a file to use for swap (by descriptor)."); + UsageError(" Example: --swap-fd=10"); + UsageError(""); std::cerr << "See log for usage error information\n"; exit(EXIT_FAILURE); } @@ -393,6 +399,25 @@ static void ParseDouble(const std::string& option, char after_char, double min, *parsed_value = value; } +static constexpr size_t kMinDexFilesForSwap = 2; +static constexpr size_t kMinDexFileCumulativeSizeForSwap = 20 * MB; + +static bool UseSwap(bool is_image, std::vector<const DexFile*>& dex_files) { + if (is_image) { + // Don't use swap, we know generation should succeed, and we don't want to slow it down. + return false; + } + if (dex_files.size() < kMinDexFilesForSwap) { + // If there are less dex files than the threshold, assume it's gonna be fine. + return false; + } + size_t dex_files_size = 0; + for (const auto* dex_file : dex_files) { + dex_files_size += dex_file->GetHeader().file_size_; + } + return dex_files_size >= kMinDexFileCumulativeSizeForSwap; +} + class Dex2Oat FINAL { public: explicit Dex2Oat(TimingLogger* timings) : @@ -416,6 +441,7 @@ class Dex2Oat FINAL { dump_passes_(false), dump_timing_(false), dump_slow_timing_(kIsDebugBuild), + swap_fd_(-1), timings_(timings) {} ~Dex2Oat() { @@ -684,6 +710,16 @@ class Dex2Oat FINAL { << "failures."; init_failure_output_.reset(); } + } else if (option.starts_with("--swap-file=")) { + swap_file_name_ = option.substr(strlen("--swap-file=")).data(); + } else if (option.starts_with("--swap-fd=")) { + const char* swap_fd_str = option.substr(strlen("--swap-fd=")).data(); + if (!ParseInt(swap_fd_str, &swap_fd_)) { + Usage("Failed to parse --swap-fd argument '%s' as an integer", swap_fd_str); + } + if (swap_fd_ < 0) { + Usage("--swap-fd passed a negative value %d", swap_fd_); + } } else { Usage("Unknown argument %s", option.data()); } @@ -918,7 +954,8 @@ class Dex2Oat FINAL { } } - // Check whether the oat output file is writable, and open it for later. + // Check whether the oat output file is writable, and open it for later. Also open a swap file, + // if a name is given. bool OpenFile() { bool create_file = !oat_unstripped_.empty(); // as opposed to using open file descriptor if (create_file) { @@ -942,6 +979,27 @@ class Dex2Oat FINAL { oat_file_->Erase(); return false; } + + // Swap file handling. + // + // If the swap fd is not -1, we assume this is the file descriptor of an open but unlinked file + // that we can use for swap. + // + // If the swap fd is -1 and we have a swap-file string, open the given file as a swap file. We + // will immediately unlink to satisfy the swap fd assumption. + if (swap_fd_ == -1 && !swap_file_name_.empty()) { + std::unique_ptr<File> swap_file(OS::CreateEmptyFile(swap_file_name_.c_str())); + if (swap_file.get() == nullptr) { + PLOG(ERROR) << "Failed to create swap file: " << swap_file_name_; + return false; + } + swap_fd_ = swap_file->Fd(); + swap_file->MarkUnchecked(); // We don't we to track this, it will be unlinked immediately. + swap_file->DisableAutoClose(); // We'll handle it ourselves, the File object will be + // released immediately. + unlink(swap_file_name_.c_str()); + } + return true; } @@ -1085,6 +1143,18 @@ class Dex2Oat FINAL { } } + // If we use a swap file, ensure we are above the threshold to make it necessary. + if (swap_fd_ != -1) { + if (!UseSwap(image_, dex_files_)) { + close(swap_fd_); + swap_fd_ = -1; + LOG(INFO) << "Decided to run without swap."; + } else { + LOG(INFO) << "Accepted running with swap."; + } + } + // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that. + /* * If we're not in interpret-only or verify-none mode, go ahead and compile small applications. * Don't bother to check if we're doing the image. @@ -1143,6 +1213,7 @@ class Dex2Oat FINAL { dump_stats_, dump_passes_, compiler_phases_timings_.get(), + swap_fd_, profile_file_)); driver_->CompileAll(class_loader, dex_files_, timings_); @@ -1591,6 +1662,8 @@ class Dex2Oat FINAL { bool dump_passes_; bool dump_timing_; bool dump_slow_timing_; + std::string swap_file_name_; + int swap_fd_; std::string profile_file_; // Profile file to use TimingLogger* timings_; std::unique_ptr<CumulativeLogger> compiler_phases_timings_; diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc index 52fd736cdb..31e653bf92 100644 --- a/disassembler/disassembler_arm.cc +++ b/disassembler/disassembler_arm.cc @@ -21,6 +21,7 @@ #include <ostream> #include <sstream> +#include "arch/arm/registers_arm.h" #include "base/logging.h" #include "base/stringprintf.h" #include "thread.h" @@ -148,15 +149,15 @@ struct ThumbRegister : ArmRegister { ThumbRegister(uint16_t instruction, uint16_t at_bit) : ArmRegister((instruction >> at_bit) & 0x7) {} }; -struct Rm { - explicit Rm(uint32_t instruction) : shift((instruction >> 4) & 0xff), rm(instruction & 0xf) {} - uint32_t shift; +struct RmLslImm2 { + explicit RmLslImm2(uint32_t instr) : imm2((instr >> 4) & 0x3), rm(instr & 0xf) {} + uint32_t imm2; ArmRegister rm; }; -std::ostream& operator<<(std::ostream& os, const Rm& r) { +std::ostream& operator<<(std::ostream& os, const RmLslImm2& r) { os << r.rm; - if (r.shift != 0) { - os << "-shift-" << r.shift; // TODO + if (r.imm2 != 0) { + os << ", lsl #" << r.imm2; } return os; } @@ -397,7 +398,74 @@ static uint64_t VFPExpand64(uint32_t imm8) { uint64_t bit_a = (imm8 >> 7) & 1; uint64_t bit_b = (imm8 >> 6) & 1; uint64_t slice = imm8 & 0x3f; - return (bit_a << 31) | ((UINT64_C(1) << 62) - (bit_b << 54)) | (slice << 48); + return (bit_a << 63) | ((UINT64_C(1) << 62) - (bit_b << 54)) | (slice << 48); +} + +enum T2LitType { + kT2LitInvalid, + kT2LitUByte, + kT2LitSByte, + kT2LitUHalf, + kT2LitSHalf, + kT2LitUWord, + kT2LitSWord, + kT2LitHexWord, + kT2LitULong, + kT2LitSLong, + kT2LitHexLong, +}; +std::ostream& operator<<(std::ostream& os, T2LitType type) { + return os << static_cast<int>(type); +} + +void DumpThumb2Literal(std::ostream& args, const uint8_t* instr_ptr, uint32_t U, uint32_t imm32, + T2LitType type) { + // Literal offsets (imm32) are not required to be aligned so we may need unaligned access. + typedef const int16_t unaligned_int16_t __attribute__ ((aligned (1))); + typedef const uint16_t unaligned_uint16_t __attribute__ ((aligned (1))); + typedef const int32_t unaligned_int32_t __attribute__ ((aligned (1))); + typedef const uint32_t unaligned_uint32_t __attribute__ ((aligned (1))); + typedef const int64_t unaligned_int64_t __attribute__ ((aligned (1))); + typedef const uint64_t unaligned_uint64_t __attribute__ ((aligned (1))); + + uintptr_t pc = RoundDown(reinterpret_cast<intptr_t>(instr_ptr) + 4, 4); + uintptr_t lit_adr = U ? pc + imm32 : pc - imm32; + args << " ; "; + switch (type) { + case kT2LitUByte: + args << *reinterpret_cast<const uint8_t*>(lit_adr); + break; + case kT2LitSByte: + args << *reinterpret_cast<const int8_t*>(lit_adr); + break; + case kT2LitUHalf: + args << *reinterpret_cast<const unaligned_uint16_t*>(lit_adr); + break; + case kT2LitSHalf: + args << *reinterpret_cast<const unaligned_int16_t*>(lit_adr); + break; + case kT2LitUWord: + args << *reinterpret_cast<const unaligned_uint32_t*>(lit_adr); + break; + case kT2LitSWord: + args << *reinterpret_cast<const unaligned_int32_t*>(lit_adr); + break; + case kT2LitHexWord: + args << StringPrintf("0x%08x", *reinterpret_cast<const unaligned_uint32_t*>(lit_adr)); + break; + case kT2LitULong: + args << *reinterpret_cast<const unaligned_uint64_t*>(lit_adr); + break; + case kT2LitSLong: + args << *reinterpret_cast<const unaligned_int64_t*>(lit_adr); + break; + case kT2LitHexLong: + args << StringPrintf("0x%" PRIx64, *reinterpret_cast<unaligned_int64_t*>(lit_adr)); + break; + default: + LOG(FATAL) << "Invalid type: " << type; + break; + } } size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) { @@ -756,10 +824,7 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) args << d << ", [" << Rn << ", #" << ((U == 1) ? "" : "-") << (imm8 << 2) << "]"; if (Rn.r == 15 && U == 1) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + (imm8 << 2); - typedef const int64_t unaligned_int64_t __attribute__ ((aligned (2))); - args << StringPrintf(" ; 0x%" PRIx64, *reinterpret_cast<unaligned_int64_t*>(lit_adr)); + DumpThumb2Literal(args, instr_ptr, U, imm8 << 2, kT2LitHexLong); } } else if (Rn.r == 13 && W == 1 && U == L) { // VPUSH/VPOP opcode << (L == 1 ? "vpop" : "vpush"); @@ -1227,164 +1292,141 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) break; case 3: switch (op2) { - case 0x00: case 0x02: case 0x04: case 0x06: // 000xxx0 - case 0x08: case 0x09: case 0x0A: case 0x0C: case 0x0E: { - // Store single data item - // |111|11|100|000|0|0000|1111|110000|000000| - // |5 3|21|098|765|4|3 0|5 2|10 6|5 0| - // |---|--|---|---|-|----|----|------|------| - // |332|22|222|222|2|1111|1111|110000|000000| - // |1 9|87|654|321|0|9 6|5 2|10 6|5 0| - // |---|--|---|---|-|----|----|------|------| - // |111|11|000|op3|0| | | op4 | | - uint32_t op3 = (instr >> 21) & 7; - // uint32_t op4 = (instr >> 6) & 0x3F; - switch (op3) { - case 0x0: case 0x4: { - // {ST,LD}RB Rt,[Rn,#+/-imm12] - 111 11 00 0 1 00 0 nnnn tttt 1 PUWii ii iiii - // {ST,LD}RB Rt,[Rn,#+/-imm8] - 111 11 00 0 0 00 0 nnnn tttt 1 PUWii ii iiii - // {ST,LD}RB Rt,[Rn,Rm,lsl #imm2] - 111 11 00 0 0 00 0 nnnn tttt 0 00000 ii mmmm - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - opcode << (HasBitSet(instr, 20) ? "ldrb" : "strb"); - if (HasBitSet(instr, 23)) { - uint32_t imm12 = instr & 0xFFF; - args << Rt << ", [" << Rn << ",#" << imm12 << "]"; - } else if ((instr & 0x800) != 0) { - uint32_t imm8 = instr & 0xFF; - args << Rt << ", [" << Rn << ",#" << imm8 << "]"; - } else { - uint32_t imm2 = (instr >> 4) & 3; - ArmRegister Rm(instr, 0); - args << Rt << ", [" << Rn << ", " << Rm; - if (imm2 != 0) { - args << ", " << "lsl #" << imm2; - } - args << "]"; - } - break; - } - case 0x1: case 0x5: { - // STRH Rt,[Rn,#+/-imm12] - 111 11 00 0 1 01 0 nnnn tttt 1 PUWii ii iiii - // STRH Rt,[Rn,#+/-imm8] - 111 11 00 0 0 01 0 nnnn tttt 1 PUWii ii iiii - // STRH Rt,[Rn,Rm,lsl #imm2] - 111 11 00 0 0 01 0 nnnn tttt 0 00000 ii mmmm - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - opcode << "strh"; - if (HasBitSet(instr, 23)) { - uint32_t imm12 = instr & 0xFFF; - args << Rt << ", [" << Rn << ",#" << imm12 << "]"; - } else if ((instr & 0x800) != 0) { - uint32_t imm8 = instr & 0xFF; - args << Rt << ", [" << Rn << ",#" << imm8 << "]"; - } else { - uint32_t imm2 = (instr >> 4) & 3; - ArmRegister Rm(instr, 0); - args << Rt << ", [" << Rn << ", " << Rm; - if (imm2 != 0) { - args << ", " << "lsl #" << imm2; - } - args << "]"; - } - break; - } - case 0x2: case 0x6: { - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - if (op3 == 2) { - if ((instr & 0x800) != 0) { - // STR Rt, [Rn, #imm8] - 111 11 000 010 0 nnnn tttt 1PUWiiiiiiii - uint32_t P = (instr >> 10) & 1; - uint32_t U = (instr >> 9) & 1; - uint32_t W = (instr >> 8) & 1; - uint32_t imm8 = instr & 0xFF; - int32_t imm32 = (imm8 << 24) >> 24; // sign-extend imm8 - if (Rn.r == 13 && P == 1 && U == 0 && W == 1 && imm32 == 4) { - opcode << "push"; - args << "{" << Rt << "}"; - } else if (Rn.r == 15 || (P == 0 && W == 0)) { - opcode << "UNDEFINED"; - } else { - if (P == 1 && U == 1 && W == 0) { - opcode << "strt"; - } else { - opcode << "str"; - } - args << Rt << ", [" << Rn; - if (P == 0 && W == 1) { - args << "], #" << imm32; - } else { - args << ", #" << imm32 << "]"; - if (W == 1) { - args << "!"; - } - } - } - } else { - // STR Rt, [Rn, Rm, LSL #imm2] - 111 11 000 010 0 nnnn tttt 000000iimmmm - ArmRegister Rm(instr, 0); - uint32_t imm2 = (instr >> 4) & 3; - opcode << "str.w"; - args << Rt << ", [" << Rn << ", " << Rm; - if (imm2 != 0) { - args << ", lsl #" << imm2; - } - args << "]"; - } - } else if (op3 == 6) { - // STR.W Rt, [Rn, #imm12] - 111 11 000 110 0 nnnn tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << "str.w"; - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - } - break; - } - } - + case 0x07: case 0x0F: case 0x17: case 0x1F: { // Explicitly UNDEFINED, A6.3. + opcode << "UNDEFINED"; + break; + } + case 0x06: case 0x0E: { // "Store single data item" undefined opcodes, A6.3.10. + opcode << "UNDEFINED [store]"; + break; + } + case 0x15: case 0x1D: { // "Load word" undefined opcodes, A6.3.7. + opcode << "UNDEFINED [load]"; break; } - case 0x03: case 0x0B: case 0x11: case 0x13: case 0x19: case 0x1B: { // 00xx011 - // Load byte/halfword - // |111|11|10|0 0|00|0|0000|1111|110000|000000| - // |5 3|21|09|8 7|65|4|3 0|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |332|22|22|2 2|22|2|1111|1111|110000|000000| - // |1 9|87|65|4 3|21|0|9 6|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |111|11|00|op3|01|1| Rn | Rt | op4 | | - // |111|11| op2 | | | imm12 | - uint32_t op3 = (instr >> 23) & 3; + case 0x10: case 0x12: case 0x14: case 0x16: case 0x18: case 0x1A: case 0x1C: case 0x1E: { + opcode << "UNKNOWN " << op2 << " [SIMD]"; + break; + } + case 0x01: case 0x00: case 0x09: case 0x08: // {LD,ST}RB{,T} + case 0x03: case 0x02: case 0x0B: case 0x0A: // {LD,ST}RH{,T} + case 0x05: case 0x04: case 0x0D: case 0x0C: // {LD,ST}R{,T} + case 0x11: case 0x19: // LDRSB{,T} (no signed store) + case 0x13: case 0x1B: { // LDRSH{,T} (no signed store) + // Load: + // (Store is the same except that l==0 and always s==0 below.) + // 00s.whl (sign, word, half, load) + // LDR{S}B imm12: 11111|00s1001| Rn | Rt |imm12 (0x09) + // LDR{S}B imm8: 11111|00s0001| Rn | Rt |1PUW|imm8 (0x01) + // LDR{S}BT imm8: 11111|00s0001| Rn | Rt |1110|imm8 (0x01) + // LDR{S}B lit: 11111|00sU001|1111| Rt |imm12 (0x01/0x09) + // LDR{S}B reg: 11111|00s0001| Rn | Rt |000000|imm2| Rm (0x01) + // LDR{S}H imm12: 11111|00s1011| Rn | Rt |imm12 (0x0B) + // LDR{S}H imm8: 11111|00s0011| Rn | Rt |1PUW|imm8 (0x03) + // LDR{S}HT imm8: 11111|00s0011| Rn | Rt |1110|imm8 (0x03) + // LDR{S}H lit: 11111|00sU011|1111| Rt |imm12 (0x03/0x0B) + // LDR{S}H reg: 11111|00s0011| Rn | Rt |000000|imm2| Rm (0x03) + // LDR imm12: 11111|0001101| Rn | Rt |imm12 (0x0D) + // LDR imm8: 11111|0000101| Rn | Rt |1PUW|imm8 (0x05) + // LDRT imm8: 11111|0000101| Rn | Rt |1110|imm8 (0x05) + // LDR lit: 11111|000U101|1111| Rt |imm12 (0x05/0x0D) + // LDR reg: 11111|0000101| Rn | Rt |000000|imm2| Rm (0x05) + // + // If Rt == 15, instead of load we have preload: + // PLD{W} imm12: 11111|00010W1| Rn |1111|imm12 (0x09/0x0B) + // PLD{W} imm8: 11111|00000W1| Rn |1111|1100|imm8 (0x01/0x03); -imm8 + // PLD lit: 11111|000U001|1111|1111|imm12 (0x01/0x09) + // PLD{W} reg: 11111|00000W1| Rn |1111|000000|imm2| Rm (0x01/0x03) + // PLI imm12: 11111|0011001| Rn |1111|imm12 (0x19) + // PLI imm8: 11111|0010001| Rn |1111|1100|imm8 (0x11); -imm8 + // PLI lit: 11111|001U001|1111|1111|imm12 (0x01/0x09) + // PLI reg: 11111|0010001| Rn |1111|000000|imm2| Rm (0x01/0x03) + + bool is_load = HasBitSet(instr, 20); + bool is_half = HasBitSet(instr, 21); // W for PLD/PLDW. + bool is_word = HasBitSet(instr, 22); + bool is_signed = HasBitSet(instr, 24); ArmRegister Rn(instr, 16); ArmRegister Rt(instr, 12); - if (Rt.r != 15) { - if (op3 == 1) { - // LDRH.W Rt, [Rn, #imm12] - 111 11 00 01 011 nnnn tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << "ldrh.w"; - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - if (Rn.r == 9) { - args << " ; "; - Thread::DumpThreadOffset<4>(args, imm12); - } else if (Rn.r == 15) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + imm12; - args << StringPrintf(" ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr)); - } - } else if (op3 == 3) { - // LDRSH.W Rt, [Rn, #imm12] - 111 11 00 11 011 nnnn tttt iiiiiiiiiiii - // LDRSB.W Rt, [Rn, #imm12] - 111 11 00 11 001 nnnn tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << (HasBitSet(instr, 20) ? "ldrsb.w" : "ldrsh.w"); - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - if (Rn.r == 9) { - args << " ; "; - Thread::DumpThreadOffset<4>(args, imm12); - } else if (Rn.r == 15) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + imm12; - args << StringPrintf(" ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr)); + uint32_t imm12 = instr & 0xFFF; + uint32_t U = (instr >> 23) & 1; // U for imm12 + uint32_t imm8 = instr & 0xFF; + uint32_t op4 = (instr >> 8) & 0xF; // 1PUW for imm8 + if (Rt.r == PC && is_load && !is_word) { + // PLD, PLDW, PLI + const char* pld_pli = (is_signed ? "pli" : "pld"); + const char* w = (is_half ? "w" : ""); + if (is_signed && !is_half) { + opcode << "UNDEFINED [PLI+W]"; + } else if (Rn.r == PC || U != 0u) { + opcode << pld_pli << w; + args << "[" << Rn << ", #" << (U != 0u ? "" : "-") << imm12 << "]"; + if (Rn.r == PC && is_half) { + args << " (UNPREDICTABLE)"; } + } else if ((instr & 0xFC0) == 0) { + opcode << pld_pli << w; + RmLslImm2 Rm(instr); + args << "[" << Rn << ", " << Rm << "]"; + } else if (op4 == 0xC) { + opcode << pld_pli << w; + args << "[" << Rn << ", #-" << imm8 << "]"; + } else { + opcode << "UNDEFINED [~" << pld_pli << "]"; } + break; + } + const char* ldr_str = is_load ? "ldr" : "str"; + const char* sign = is_signed ? "s" : ""; + const char* type = is_word ? "" : is_half ? "h" : "b"; + bool unpred = (Rt.r == SP && !is_word) || (Rt.r == PC && !is_load); + if (Rn.r == PC && !is_load) { + opcode << "UNDEFINED [STR-lit]"; + unpred = false; + } else if (Rn.r == PC || U != 0u) { + // Load/store with imm12 (load literal if Rn.r == PC; there's no store literal). + opcode << ldr_str << sign << type << ".w"; + args << Rt << ", [" << Rn << ", #" << (U != 0u ? "" : "-") << imm12 << "]"; + if (Rn.r == TR && is_load) { + args << " ; "; + Thread::DumpThreadOffset<4>(args, imm12); + } else if (Rn.r == PC) { + T2LitType lit_type[] = { + kT2LitUByte, kT2LitUHalf, kT2LitHexWord, kT2LitInvalid, + kT2LitUByte, kT2LitUHalf, kT2LitHexWord, kT2LitInvalid, + kT2LitSByte, kT2LitSHalf, kT2LitInvalid, kT2LitInvalid, + kT2LitSByte, kT2LitSHalf, kT2LitInvalid, kT2LitInvalid, + }; + DCHECK_LT(op2 >> 1, arraysize(lit_type)); + DCHECK_NE(lit_type[op2 >> 1], kT2LitInvalid); + DumpThumb2Literal(args, instr_ptr, U, imm12, lit_type[op2 >> 1]); + } + } else if ((instr & 0xFC0) == 0) { + opcode << ldr_str << sign << type << ".w"; + RmLslImm2 Rm(instr); + args << Rt << ", [" << Rn << ", " << Rm << "]"; + unpred = unpred || (Rm.rm.r == SP) || (Rm.rm.r == PC); + } else if (is_word && Rn.r == SP && imm8 == 4 && op4 == (is_load ? 0xB : 0xD)) { + opcode << (is_load ? "pop" : "push") << ".w"; + args << Rn; + unpred = unpred || (Rn.r == SP); + } else if ((op4 & 5) == 0) { + opcode << "UNDEFINED [P = W = 0 for " << ldr_str << "]"; + unpred = false; + } else { + uint32_t P = (instr >> 10) & 1; + U = (instr >> 9) & 1; + uint32_t W = (instr >> 8) & 1; + bool pre_index = (P != 0 && W == 1); + bool post_index = (P == 0 && W == 1); + const char* t = (P != 0 && U != 0 && W == 0) ? "t" : ""; // Unprivileged load/store? + opcode << ldr_str << sign << type << t << ".w"; + args << Rt << ", [" << Rn << (post_index ? "]" : "") << ", #" << (U != 0 ? "" : "-") + << imm8 << (post_index ? "" : "]") << (pre_index ? "!" : ""); + unpred = (W != 0 && Rn.r == Rt.r); + } + if (unpred) { + args << " (UNPREDICTABLE)"; } break; } @@ -1413,75 +1455,6 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) } // else unknown instruction break; } - case 0x05: case 0x0D: case 0x15: case 0x1D: { // 00xx101 - // Load word - // |111|11|10|0 0|00|0|0000|1111|110000|000000| - // |5 3|21|09|8 7|65|4|3 0|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |332|22|22|2 2|22|2|1111|1111|110000|000000| - // |1 9|87|65|4 3|21|0|9 6|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |111|11|00|op3|10|1| Rn | Rt | op4 | | - // |111|11| op2 | | | imm12 | - uint32_t op3 = (instr >> 23) & 3; - uint32_t op4 = (instr >> 6) & 0x3F; - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - if (op3 == 1 || Rn.r == 15) { - // LDR.W Rt, [Rn, #imm12] - 111 11 00 00 101 nnnn tttt iiiiiiiiiiii - // LDR.W Rt, [PC, #imm12] - 111 11 00 0x 101 1111 tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << "ldr.w"; - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - if (Rn.r == 9) { - args << " ; "; - Thread::DumpThreadOffset<4>(args, imm12); - } else if (Rn.r == 15) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + imm12; - args << StringPrintf(" ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr)); - } - } else if (op4 == 0) { - // LDR.W Rt, [Rn, Rm{, LSL #imm2}] - 111 11 00 00 101 nnnn tttt 000000iimmmm - uint32_t imm2 = (instr >> 4) & 0xF; - ArmRegister rm(instr, 0); - opcode << "ldr.w"; - args << Rt << ", [" << Rn << ", " << rm; - if (imm2 != 0) { - args << ", lsl #" << imm2; - } - args << "]"; - } else { - bool p = (instr & (1 << 10)) != 0; - bool w = (instr & (1 << 8)) != 0; - bool u = (instr & (1 << 9)) != 0; - if (p && u && !w) { - // LDRT Rt, [Rn, #imm8] - 111 11 00 00 101 nnnn tttt 1110iiiiiiii - uint32_t imm8 = instr & 0xFF; - opcode << "ldrt"; - args << Rt << ", [" << Rn << ", #" << imm8 << "]"; - } else if (Rn.r == 13 && !p && u && w && (instr & 0xff) == 4) { - // POP - opcode << "pop"; - args << "{" << Rt << "}"; - } else { - bool wback = !p || w; - uint32_t offset = (instr & 0xff); - opcode << "ldr.w"; - args << Rt << ","; - if (p && !wback) { - args << "[" << Rn << ", #" << offset << "]"; - } else if (p && wback) { - args << "[" << Rn << ", #" << offset << "]!"; - } else if (!p && wback) { - args << "[" << Rn << "], #" << offset; - } else { - LOG(FATAL) << p << " " << w; - } - } - } - break; - } default: // more formats if ((op2 >> 4) == 2) { // 010xxxx // data processing (register) @@ -1808,6 +1781,23 @@ size_t DisassemblerArm::DumpThumb16(std::ostream& os, const uint8_t* instr_ptr) DumpBranchTarget(args, instr_ptr + 4, imm32); break; } + case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x26: case 0x27: + case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: { + opcode << "push"; + args << RegisterList((instr & 0xFF) | ((instr & 0x100) << 6)); + break; + } + case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: + case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: { + opcode << "pop"; + args << RegisterList((instr & 0xFF) | ((instr & 0x100) << 7)); + break; + } + case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: { + opcode << "bkpt"; + args << "#" << (instr & 0xFF); + break; + } case 0x50: case 0x51: // 101000x case 0x52: case 0x53: // 101001x case 0x56: case 0x57: { // 101011x diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index b989c7fbf7..de4ea36bbb 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -2177,11 +2177,18 @@ struct OatdumpMain : public CmdlineMain<OatdumpArgs> { virtual bool ExecuteWithoutRuntime() OVERRIDE { CHECK(args_ != nullptr); - CHECK(args_->symbolize_); + CHECK(args_->oat_filename_ != nullptr); MemMap::Init(); - return SymbolizeOat(args_->oat_filename_, args_->output_name_) == EXIT_SUCCESS; + if (args_->symbolize_) { + return SymbolizeOat(args_->oat_filename_, args_->output_name_) == EXIT_SUCCESS; + } else { + return DumpOat(nullptr, + args_->oat_filename_, + oat_dumper_options_.release(), + args_->os_) == EXIT_SUCCESS; + } } virtual bool ExecuteWithRuntime(Runtime* runtime) { diff --git a/runtime/Android.mk b/runtime/Android.mk index ca29eba4ee..13a216c48b 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -67,6 +67,7 @@ LIBART_COMMON_SRC_FILES := \ gc/space/rosalloc_space.cc \ gc/space/space.cc \ gc/space/zygote_space.cc \ + gc/task_processor.cc \ hprof/hprof.cc \ image.cc \ indirect_reference_table.cc \ diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 66ea3ce8e5..fec1ce59bf 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -233,6 +233,11 @@ END \c_name DELIVER_PENDING_EXCEPTION .endm +.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + RETURN_IF_RESULT_IS_NON_ZERO + DELIVER_PENDING_EXCEPTION +.endm + // Macros taking opportunity of code similarities for downcalls with referrer for non-wide fields. .macro ONE_ARG_REF_DOWNCALL name, entrypoint, return .extern \entrypoint @@ -258,7 +263,7 @@ ENTRY \name END \name .endm -.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return +.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return .extern \entrypoint ENTRY \name SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3, r12 @ save callee saves in case of GC @@ -633,50 +638,35 @@ ENTRY art_quick_aput_obj bkpt @ unreached END art_quick_aput_obj - /* - * Entry from managed code when uninitialized static storage, this stub will run the class - * initializer and deliver the exception on error. On success the static storage base is - * returned. - */ - .extern artInitializeStaticStorageFromCode -ENTRY art_quick_initialize_static_storage - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3 @ save callee saves in case of GC - mov r2, r9 @ pass Thread::Current - @ artInitializeStaticStorageFromCode(uint32_t type_idx, Method* referrer, Thread*) - bl artInitializeStaticStorageFromCode +// Macro to facilitate adding new allocation entrypoints. +.macro TWO_ARG_DOWNCALL name, entrypoint, return + .extern \entrypoint +ENTRY \name + SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3 @ save callee saves in case of GC + mov r2, r9 @ pass Thread::Current + bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME - RETURN_IF_RESULT_IS_NON_ZERO - DELIVER_PENDING_EXCEPTION -END art_quick_initialize_static_storage + \return +END \name +.endm - /* - * Entry from managed code when dex cache misses for a type_idx - */ - .extern artInitializeTypeFromCode -ENTRY art_quick_initialize_type - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3 @ save callee saves in case of GC - mov r2, r9 @ pass Thread::Current - @ artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*) - bl artInitializeTypeFromCode +// Macro to facilitate adding new array allocation entrypoints. +.macro THREE_ARG_DOWNCALL name, entrypoint, return + .extern \entrypoint +ENTRY \name + SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3, r12 @ save callee saves in case of GC + mov r3, r9 @ pass Thread::Current + @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*) + bl \entrypoint RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME - RETURN_IF_RESULT_IS_NON_ZERO - DELIVER_PENDING_EXCEPTION -END art_quick_initialize_type + \return +END \name +.endm - /* - * Entry from managed code when type_idx needs to be checked for access and dex cache may also - * miss. - */ - .extern artInitializeTypeAndVerifyAccessFromCode -ENTRY art_quick_initialize_type_and_verify_access - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3 @ save callee saves in case of GC - mov r2, r9 @ pass Thread::Current - @ artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx, Method* referrer, Thread*) - bl artInitializeTypeAndVerifyAccessFromCode - RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME - RETURN_IF_RESULT_IS_NON_ZERO - DELIVER_PENDING_EXCEPTION -END art_quick_initialize_type_and_verify_access +TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + +TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER /* * Called by managed code to resolve a static field and load a non-wide value. @@ -789,43 +779,7 @@ END art_quick_set64_instance * R1 holds the string index. The fast path check for hit in strings cache has already been * performed. */ - .extern artResolveStringFromCode -ENTRY art_quick_resolve_string - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3 @ save callee saves in case of GC - mov r2, r9 @ pass Thread::Current - @ artResolveStringFromCode(Method* referrer, uint32_t string_idx, Thread*) - bl artResolveStringFromCode - RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME - RETURN_IF_RESULT_IS_NON_ZERO - DELIVER_PENDING_EXCEPTION -END art_quick_resolve_string - -// Macro to facilitate adding new allocation entrypoints. -.macro TWO_ARG_DOWNCALL name, entrypoint, return - .extern \entrypoint -ENTRY \name - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3 @ save callee saves in case of GC - mov r2, r9 @ pass Thread::Current - bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) - RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME - \return - DELIVER_PENDING_EXCEPTION -END \name -.endm - -// Macro to facilitate adding new array allocation entrypoints. -.macro THREE_ARG_DOWNCALL name, entrypoint, return - .extern \entrypoint -ENTRY \name - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3, r12 @ save callee saves in case of GC - mov r3, r9 @ pass Thread::Current - @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*) - bl \entrypoint - RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME - \return - DELIVER_PENDING_EXCEPTION -END \name -.endm +TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Generate the allocation entrypoints for each allocator. GENERATE_ALL_ALLOC_ENTRYPOINTS diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 6047bb063f..770073b5ba 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1229,7 +1229,6 @@ ENTRY \name bl \entrypoint // (uint32_t type_idx, Method* method, Thread*) RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME \return - DELIVER_PENDING_EXCEPTION END \name .endm @@ -1242,7 +1241,6 @@ ENTRY \name bl \entrypoint RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME \return - DELIVER_PENDING_EXCEPTION END \name .endm @@ -1283,6 +1281,13 @@ ENTRY \name END \name .endm +.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + cbz w0, 1f // result zero branch over + ret // return +1: + DELIVER_PENDING_EXCEPTION +.endm + /* * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on * failure. @@ -1294,10 +1299,10 @@ TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, * initializer and deliver the exception on error. On success the static storage base is * returned. */ -TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO +TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO -TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO +TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1 @@ -1345,7 +1350,7 @@ END art_quick_set64_static * w1 holds the string index. The fast path check for hit in strings cache has already been * performed. */ -TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO +TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Generate the allocation entrypoints for each allocator. GENERATE_ALL_ALLOC_ENTRYPOINTS diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index a0fc6d2f9c..509f9910a2 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -299,7 +299,7 @@ DELIVER_PENDING_EXCEPTION .endm -.macro RETURN_IF_RESULT_IS_NON_ZERO +.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME beqz $v0, 1f # success? nop @@ -696,44 +696,6 @@ ENTRY art_quick_aput_obj END art_quick_aput_obj /* - * Entry from managed code when uninitialized static storage, this stub will run the class - * initializer and deliver the exception on error. On success the static storage base is - * returned. - */ - .extern artInitializeStaticStorageFromCode -ENTRY art_quick_initialize_static_storage - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC - # artInitializeStaticStorageFromCode(uint32_t type_idx, Method* referrer, Thread*) - jal artInitializeStaticStorageFromCode - move $a2, rSELF # pass Thread::Current - RETURN_IF_RESULT_IS_NON_ZERO -END art_quick_initialize_static_storage - - /* - * Entry from managed code when dex cache misses for a type_idx. - */ - .extern artInitializeTypeFromCode -ENTRY art_quick_initialize_type - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC - # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*) - jal artInitializeTypeFromCode - move $a2, rSELF # pass Thread::Current - RETURN_IF_RESULT_IS_NON_ZERO -END art_quick_initialize_type - - /* - * Entry from managed code when type_idx needs to be checked for access and dex cache may also - * miss. - */ - .extern artInitializeTypeAndVerifyAccessFromCode -ENTRY art_quick_initialize_type_and_verify_access - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC - # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*) - jal artInitializeTypeAndVerifyAccessFromCode - move $a2, rSELF # pass Thread::Current - RETURN_IF_RESULT_IS_NON_ZERO -END art_quick_initialize_type_and_verify_access - /* * Called by managed code to resolve a static field and load a boolean primitive value. */ .extern artGetBooleanStaticFromCode @@ -1018,22 +980,6 @@ ENTRY art_quick_set_obj_instance RETURN_IF_ZERO END art_quick_set_obj_instance - /* - * Entry from managed code to resolve a string, this stub will allocate a String and deliver an - * exception on error. On success the String is returned. R0 holds the referring method, - * R1 holds the string index. The fast path check for hit in strings cache has already been - * performed. - */ - .extern artResolveStringFromCode -ENTRY art_quick_resolve_string - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC - # artResolveStringFromCode(Method* referrer, uint32_t string_idx, Thread*) - jal artResolveStringFromCode - move $a2, rSELF # pass Thread::Current - RETURN_IF_RESULT_IS_NON_ZERO -END art_quick_resolve_string - - // Macro to facilitate adding new allocation entrypoints. .macro TWO_ARG_DOWNCALL name, entrypoint, return .extern \entrypoint @@ -1059,6 +1005,32 @@ END \name GENERATE_ALL_ALLOC_ENTRYPOINTS /* + * Entry from managed code to resolve a string, this stub will allocate a String and deliver an + * exception on error. On success the String is returned. R0 holds the referring method, + * R1 holds the string index. The fast path check for hit in strings cache has already been + * performed. + */ +TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + + /* + * Entry from managed code when uninitialized static storage, this stub will run the class + * initializer and deliver the exception on error. On success the static storage base is + * returned. + */ +TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + + /* + * Entry from managed code when dex cache misses for a type_idx. + */ +TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + + /* + * Entry from managed code when type_idx needs to be checked for access and dex cache may also + * miss. + */ +TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + + /* * Called by managed code when the value in rSUSPEND has been decremented to 0. */ .extern artTestSuspendFromCode diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S index 632c5f372a..53b9f4e966 100644 --- a/runtime/arch/quick_alloc_entrypoints.S +++ b/runtime/arch/quick_alloc_entrypoints.S @@ -16,25 +16,25 @@ .macro GENERATE_ALLOC_ENTRYPOINTS c_suffix, cxx_suffix // Called by managed code to allocate an object. -TWO_ARG_DOWNCALL art_quick_alloc_object\c_suffix, artAllocObjectFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO +TWO_ARG_DOWNCALL art_quick_alloc_object\c_suffix, artAllocObjectFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an object of a resolved class. -TWO_ARG_DOWNCALL art_quick_alloc_object_resolved\c_suffix, artAllocObjectFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO +TWO_ARG_DOWNCALL art_quick_alloc_object_resolved\c_suffix, artAllocObjectFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an object of an initialized class. -TWO_ARG_DOWNCALL art_quick_alloc_object_initialized\c_suffix, artAllocObjectFromCodeInitialized\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO +TWO_ARG_DOWNCALL art_quick_alloc_object_initialized\c_suffix, artAllocObjectFromCodeInitialized\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an object when the caller doesn't know whether it has access // to the created type. -TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check\c_suffix, artAllocObjectFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO +TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check\c_suffix, artAllocObjectFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an array. -THREE_ARG_DOWNCALL art_quick_alloc_array\c_suffix, artAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO +THREE_ARG_DOWNCALL art_quick_alloc_array\c_suffix, artAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an array of a resolve class. -THREE_ARG_DOWNCALL art_quick_alloc_array_resolved\c_suffix, artAllocArrayFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO +THREE_ARG_DOWNCALL art_quick_alloc_array_resolved\c_suffix, artAllocArrayFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an array when the caller doesn't know whether it has access // to the created type. -THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check\c_suffix, artAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO +THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check\c_suffix, artAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY. -THREE_ARG_DOWNCALL art_quick_check_and_alloc_array\c_suffix, artCheckAndAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO +THREE_ARG_DOWNCALL art_quick_check_and_alloc_array\c_suffix, artCheckAndAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY. -THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check\c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO +THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check\c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER .endm .macro GENERATE_ALL_ALLOC_ENTRYPOINTS diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index 0fcd297497..285007c48d 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -1139,8 +1139,8 @@ TEST_F(StubTest, AllocObjectArray) { if ((false)) { // Use an arbitrary method from c to use as referrer size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()), // type_idx - reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0)), // arbitrary 10U, + reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0)), // arbitrary StubTest::GetEntrypoint(self, kQuickAllocArray), self); @@ -1155,7 +1155,8 @@ TEST_F(StubTest, AllocObjectArray) { { // We can use nullptr in the second argument as we do not need a method here (not used in // resolved/initialized cases) - size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(nullptr), 10U, + size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), 10U, + reinterpret_cast<size_t>(nullptr), StubTest::GetEntrypoint(self, kQuickAllocArrayResolved), self); EXPECT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException(nullptr)); @@ -1173,8 +1174,9 @@ TEST_F(StubTest, AllocObjectArray) { // Out-of-memory. { - size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(nullptr), + size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), GB, // that should fail... + reinterpret_cast<size_t>(nullptr), StubTest::GetEntrypoint(self, kQuickAllocArrayResolved), self); diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc index b781d6008c..f3e0918d33 100644 --- a/runtime/base/logging.cc +++ b/runtime/base/logging.cc @@ -16,6 +16,8 @@ #include "logging.h" +#include <iostream> +#include <limits> #include <sstream> #include "base/mutex.h" @@ -42,6 +44,19 @@ static std::unique_ptr<std::string> gCmdLine; static std::unique_ptr<std::string> gProgramInvocationName; static std::unique_ptr<std::string> gProgramInvocationShortName; +// Print INTERNAL_FATAL messages directly instead of at destruction time. This only works on the +// host right now: for the device, a stream buf collating output into lines and calling LogLine or +// lower-level logging is necessary. +#ifdef HAVE_ANDROID_OS +static constexpr bool kPrintInternalFatalDirectly = false; +#else +static constexpr bool kPrintInternalFatalDirectly = !kIsTargetBuild; +#endif + +static bool PrintDirectly(LogSeverity severity) { + return kPrintInternalFatalDirectly && severity == INTERNAL_FATAL; +} + const char* GetCmdLine() { return (gCmdLine.get() != nullptr) ? gCmdLine->c_str() : nullptr; } @@ -169,31 +184,39 @@ class LogMessageData { LogMessage::LogMessage(const char* file, unsigned int line, LogSeverity severity, int error) : data_(new LogMessageData(file, line, severity, error)) { + if (PrintDirectly(severity)) { + static const char* log_characters = "VDIWEFF"; + CHECK_EQ(strlen(log_characters), INTERNAL_FATAL + 1U); + stream() << ProgramInvocationShortName() << " " << log_characters[static_cast<size_t>(severity)] + << " " << getpid() << " " << ::art::GetTid() << " " << file << ":" << line << "]"; + } } LogMessage::~LogMessage() { - if (data_->GetSeverity() < gMinimumLogSeverity) { - return; // No need to format something we're not going to output. - } + if (!PrintDirectly(data_->GetSeverity())) { + if (data_->GetSeverity() < gMinimumLogSeverity) { + return; // No need to format something we're not going to output. + } - // Finish constructing the message. - if (data_->GetError() != -1) { - data_->GetBuffer() << ": " << strerror(data_->GetError()); - } - std::string msg(data_->ToString()); - - // Do the actual logging with the lock held. - { - MutexLock mu(Thread::Current(), *Locks::logging_lock_); - if (msg.find('\n') == std::string::npos) { - LogLine(data_->GetFile(), data_->GetLineNumber(), data_->GetSeverity(), msg.c_str()); - } else { - msg += '\n'; - size_t i = 0; - while (i < msg.size()) { - size_t nl = msg.find('\n', i); - msg[nl] = '\0'; - LogLine(data_->GetFile(), data_->GetLineNumber(), data_->GetSeverity(), &msg[i]); - i = nl + 1; + // Finish constructing the message. + if (data_->GetError() != -1) { + data_->GetBuffer() << ": " << strerror(data_->GetError()); + } + std::string msg(data_->ToString()); + + // Do the actual logging with the lock held. + { + MutexLock mu(Thread::Current(), *Locks::logging_lock_); + if (msg.find('\n') == std::string::npos) { + LogLine(data_->GetFile(), data_->GetLineNumber(), data_->GetSeverity(), msg.c_str()); + } else { + msg += '\n'; + size_t i = 0; + while (i < msg.size()) { + size_t nl = msg.find('\n', i); + msg[nl] = '\0'; + LogLine(data_->GetFile(), data_->GetLineNumber(), data_->GetSeverity(), &msg[i]); + i = nl + 1; + } } } } @@ -205,6 +228,9 @@ LogMessage::~LogMessage() { } std::ostream& LogMessage::stream() { + if (PrintDirectly(data_->GetSeverity())) { + return std::cerr; + } return data_->GetBuffer(); } @@ -239,8 +265,25 @@ void LogMessage::LogLine(const char* file, unsigned int line, LogSeverity log_se void LogMessage::LogLineLowStack(const char* file, unsigned int line, LogSeverity log_severity, const char* message) { #ifdef HAVE_ANDROID_OS - // TODO: be more conservative on stack usage here. - LogLine(file, line, log_severity, message); + // Use android_writeLog() to avoid stack-based buffers used by android_printLog(). + const char* tag = ProgramInvocationShortName(); + int priority = kLogSeverityToAndroidLogPriority[log_severity]; + char* buf = nullptr; + size_t buf_size = 0u; + if (priority == ANDROID_LOG_FATAL) { + // Allocate buffer for snprintf(buf, buf_size, "%s:%u] %s", file, line, message) below. + // If allocation fails, fall back to printing only the message. + buf_size = strlen(file) + 1 /* ':' */ + std::numeric_limits<typeof(line)>::max_digits10 + + 2 /* "] " */ + strlen(message) + 1 /* terminating 0 */; + buf = reinterpret_cast<char*>(malloc(buf_size)); + } + if (buf != nullptr) { + snprintf(buf, buf_size, "%s:%u] %s", file, line, message); + android_writeLog(priority, tag, buf); + free(buf); + } else { + android_writeLog(priority, tag, message); + } #else static const char* log_characters = "VDIWEFF"; CHECK_EQ(strlen(log_characters), INTERNAL_FATAL + 1U); diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc index 780e37a4d0..f272d88807 100644 --- a/runtime/base/unix_file/fd_file.cc +++ b/runtime/base/unix_file/fd_file.cc @@ -256,4 +256,8 @@ int FdFile::FlushClose() { return (flush_result != 0) ? flush_result : close_result; } +void FdFile::MarkUnchecked() { + guard_state_ = GuardState::kNoCheck; +} + } // namespace unix_file diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h index 1b1fb4e298..d51fbd68a7 100644 --- a/runtime/base/unix_file/fd_file.h +++ b/runtime/base/unix_file/fd_file.h @@ -85,6 +85,9 @@ class FdFile : public RandomAccessFile { kNoCheck // Do not check for the current file instance. }; + // WARNING: Only use this when you know what you're doing! + void MarkUnchecked(); + protected: // If the guard state indicates checking (!=kNoCheck), go to the target state "target". Print the // given warning if the current state is or exceeds warn_threshold. diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc index 3d7bf53a69..98fe0798ae 100644 --- a/runtime/common_runtime_test.cc +++ b/runtime/common_runtime_test.cc @@ -84,15 +84,19 @@ int ScratchFile::GetFd() const { return file_->Fd(); } -void ScratchFile::Unlink() { - if (!OS::FileExists(filename_.c_str())) { - return; - } +void ScratchFile::Close() { if (file_.get() != nullptr) { if (file_->FlushCloseOrErase() != 0) { PLOG(WARNING) << "Error closing scratch file."; } } +} + +void ScratchFile::Unlink() { + if (!OS::FileExists(filename_.c_str())) { + return; + } + Close(); int unlink_result = unlink(filename_.c_str()); CHECK_EQ(0, unlink_result); } diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h index edc3e1e07a..8851185ce1 100644 --- a/runtime/common_runtime_test.h +++ b/runtime/common_runtime_test.h @@ -55,6 +55,7 @@ class ScratchFile { int GetFd() const; + void Close(); void Unlink(); private: diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 556f2f8726..5f5d3f7460 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -3192,7 +3192,7 @@ static bool IsMethodPossiblyInlined(Thread* self, mirror::ArtMethod* m) Handle<mirror::ArtMethod> method(hs.NewHandle(m)); verifier::MethodVerifier verifier(self, dex_cache->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(), code_item, m->GetDexMethodIndex(), method, - m->GetAccessFlags(), false, true, false); + m->GetAccessFlags(), false, true, false, true); // Note: we don't need to verify the method. return InlineMethodAnalyser::AnalyseMethodCode(&verifier, nullptr); } diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index 67265a28ff..35579d66d4 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -172,8 +172,8 @@ inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass, template <bool kAccessCheck> ALWAYS_INLINE inline mirror::Class* CheckArrayAlloc(uint32_t type_idx, - mirror::ArtMethod* method, int32_t component_count, + mirror::ArtMethod* method, bool* slow_path) { if (UNLIKELY(component_count < 0)) { ThrowNegativeArraySizeException(component_count); @@ -208,12 +208,12 @@ inline mirror::Class* CheckArrayAlloc(uint32_t type_idx, template <bool kAccessCheck, bool kInstrumented> ALWAYS_INLINE inline mirror::Array* AllocArrayFromCode(uint32_t type_idx, - mirror::ArtMethod* method, int32_t component_count, + mirror::ArtMethod* method, Thread* self, gc::AllocatorType allocator_type) { bool slow_path = false; - mirror::Class* klass = CheckArrayAlloc<kAccessCheck>(type_idx, method, component_count, + mirror::Class* klass = CheckArrayAlloc<kAccessCheck>(type_idx, component_count, method, &slow_path); if (UNLIKELY(slow_path)) { if (klass == nullptr) { @@ -231,8 +231,8 @@ inline mirror::Array* AllocArrayFromCode(uint32_t type_idx, template <bool kAccessCheck, bool kInstrumented> ALWAYS_INLINE inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass, - mirror::ArtMethod* method, int32_t component_count, + mirror::ArtMethod* method, Thread* self, gc::AllocatorType allocator_type) { DCHECK(klass != nullptr); diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc index c329fe6920..db51264861 100644 --- a/runtime/entrypoints/entrypoint_utils.cc +++ b/runtime/entrypoints/entrypoint_utils.cc @@ -33,8 +33,8 @@ namespace art { static inline mirror::Class* CheckFilledNewArrayAlloc(uint32_t type_idx, - mirror::ArtMethod* referrer, int32_t component_count, + mirror::ArtMethod* referrer, Thread* self, bool access_check) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -76,11 +76,11 @@ static inline mirror::Class* CheckFilledNewArrayAlloc(uint32_t type_idx, } // Helper function to allocate array for FILLED_NEW_ARRAY. -mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* referrer, - int32_t component_count, Thread* self, +mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, int32_t component_count, + mirror::ArtMethod* referrer, Thread* self, bool access_check, gc::AllocatorType /* allocator_type */) { - mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self, + mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, component_count, referrer, self, access_check); if (UNLIKELY(klass == nullptr)) { return nullptr; @@ -96,12 +96,12 @@ mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* // Helper function to allocate array for FILLED_NEW_ARRAY. mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, - mirror::ArtMethod* referrer, int32_t component_count, + mirror::ArtMethod* referrer, Thread* self, bool access_check, gc::AllocatorType /* allocator_type */) { - mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self, + mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, component_count, referrer, self, access_check); if (UNLIKELY(klass == nullptr)) { return nullptr; @@ -183,14 +183,12 @@ void ThrowStackOverflowError(Thread* self) { env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_stackTrace, stack_trace_elem.get()); - - // Throw the exception. - ThrowLocation throw_location = self->GetCurrentLocationForThrow(); - self->SetException(throw_location, - reinterpret_cast<mirror::Throwable*>(self->DecodeJObject(exc.get()))); } else { error_msg = "Could not create stack trace."; } + // Throw the exception. + self->SetException(self->GetCurrentLocationForThrow(), + reinterpret_cast<mirror::Throwable*>(self->DecodeJObject(exc.get()))); } else { // Could not allocate a string object. error_msg = "Couldn't throw new StackOverflowError because JNI NewStringUTF failed."; diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h index 0531122ed2..77eec46161 100644 --- a/runtime/entrypoints/entrypoint_utils.h +++ b/runtime/entrypoints/entrypoint_utils.h @@ -80,8 +80,8 @@ ALWAYS_INLINE inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Clas template <bool kAccessCheck> ALWAYS_INLINE inline mirror::Class* CheckArrayAlloc(uint32_t type_idx, - mirror::ArtMethod* method, int32_t component_count, + mirror::ArtMethod* method, bool* slow_path) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -91,29 +91,30 @@ ALWAYS_INLINE inline mirror::Class* CheckArrayAlloc(uint32_t type_idx, // check. template <bool kAccessCheck, bool kInstrumented> ALWAYS_INLINE inline mirror::Array* AllocArrayFromCode(uint32_t type_idx, - mirror::ArtMethod* method, int32_t component_count, + mirror::ArtMethod* method, Thread* self, gc::AllocatorType allocator_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); template <bool kAccessCheck, bool kInstrumented> ALWAYS_INLINE inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass, - mirror::ArtMethod* method, int32_t component_count, + mirror::ArtMethod* method, Thread* self, gc::AllocatorType allocator_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); -extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method, - int32_t component_count, Thread* self, +extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, int32_t component_count, + mirror::ArtMethod* method, Thread* self, bool access_check, gc::AllocatorType allocator_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); extern mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, + int32_t component_count, mirror::ArtMethod* method, - int32_t component_count, Thread* self, + Thread* self, bool access_check, gc::AllocatorType allocator_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc index c0b79b2b6c..1fd1150e0d 100644 --- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc @@ -114,44 +114,44 @@ extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck##suffix##suffix return AllocObjectFromCode<true, instrumented_bool>(type_idx, method, self, allocator_type); \ } \ extern "C" mirror::Array* artAllocArrayFromCode##suffix##suffix2( \ - uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self) \ + uint32_t type_idx, int32_t component_count, mirror::ArtMethod* method, Thread* self) \ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \ ScopedQuickEntrypointChecks sqec(self); \ - return AllocArrayFromCode<false, instrumented_bool>(type_idx, method, component_count, self, \ + return AllocArrayFromCode<false, instrumented_bool>(type_idx, component_count, method, self, \ allocator_type); \ } \ extern "C" mirror::Array* artAllocArrayFromCodeResolved##suffix##suffix2( \ - mirror::Class* klass, mirror::ArtMethod* method, int32_t component_count, Thread* self) \ + mirror::Class* klass, int32_t component_count, mirror::ArtMethod* method, Thread* self) \ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \ ScopedQuickEntrypointChecks sqec(self); \ - return AllocArrayFromCodeResolved<false, instrumented_bool>(klass, method, component_count, self, \ + return AllocArrayFromCodeResolved<false, instrumented_bool>(klass, component_count, method, self, \ allocator_type); \ } \ extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \ - uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self) \ + uint32_t type_idx, int32_t component_count, mirror::ArtMethod* method, Thread* self) \ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \ ScopedQuickEntrypointChecks sqec(self); \ - return AllocArrayFromCode<true, instrumented_bool>(type_idx, method, component_count, self, \ + return AllocArrayFromCode<true, instrumented_bool>(type_idx, component_count, method, self, \ allocator_type); \ } \ extern "C" mirror::Array* artCheckAndAllocArrayFromCode##suffix##suffix2( \ - uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self) \ + uint32_t type_idx, int32_t component_count, mirror::ArtMethod* method, Thread* self) \ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \ ScopedQuickEntrypointChecks sqec(self); \ if (!instrumented_bool) { \ - return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, false, allocator_type); \ + return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, false, allocator_type); \ } else { \ - return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, false, allocator_type); \ + return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, false, allocator_type); \ } \ } \ extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \ - uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self) \ + uint32_t type_idx, int32_t component_count, mirror::ArtMethod* method, Thread* self) \ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \ ScopedQuickEntrypointChecks sqec(self); \ if (!instrumented_bool) { \ - return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, true, allocator_type); \ + return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, true, allocator_type); \ } else { \ - return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, true, allocator_type); \ + return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, true, allocator_type); \ } \ } @@ -165,24 +165,24 @@ GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(BumpPointer, gc::kAllocatorTypeBumpPointer) GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(TLAB, gc::kAllocatorTypeTLAB) #define GENERATE_ENTRYPOINTS(suffix) \ -extern "C" void* art_quick_alloc_array##suffix(uint32_t, void*, int32_t); \ -extern "C" void* art_quick_alloc_array_resolved##suffix(void* klass, void*, int32_t); \ -extern "C" void* art_quick_alloc_array_with_access_check##suffix(uint32_t, void*, int32_t); \ -extern "C" void* art_quick_alloc_object##suffix(uint32_t type_idx, void* method); \ -extern "C" void* art_quick_alloc_object_resolved##suffix(void* klass, void* method); \ -extern "C" void* art_quick_alloc_object_initialized##suffix(void* klass, void* method); \ -extern "C" void* art_quick_alloc_object_with_access_check##suffix(uint32_t type_idx, void* method); \ -extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, void*, int32_t); \ -extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, void*, int32_t); \ -extern "C" void* art_quick_alloc_array##suffix##_instrumented(uint32_t, void*, int32_t); \ -extern "C" void* art_quick_alloc_array_resolved##suffix##_instrumented(void* klass, void*, int32_t); \ -extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, void*, int32_t); \ -extern "C" void* art_quick_alloc_object##suffix##_instrumented(uint32_t type_idx, void* method); \ -extern "C" void* art_quick_alloc_object_resolved##suffix##_instrumented(void* klass, void* method); \ -extern "C" void* art_quick_alloc_object_initialized##suffix##_instrumented(void* klass, void* method); \ -extern "C" void* art_quick_alloc_object_with_access_check##suffix##_instrumented(uint32_t type_idx, void* method); \ -extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, void*, int32_t); \ -extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, void*, int32_t); \ +extern "C" void* art_quick_alloc_array##suffix(uint32_t, int32_t, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_array_resolved##suffix(mirror::Class* klass, int32_t, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_array_with_access_check##suffix(uint32_t, int32_t, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object##suffix(uint32_t type_idx, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object_resolved##suffix(mirror::Class* klass, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object_initialized##suffix(mirror::Class* klass, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object_with_access_check##suffix(uint32_t type_idx, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, int32_t, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, int32_t, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_array##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_array_resolved##suffix##_instrumented(mirror::Class* klass, int32_t, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object##suffix##_instrumented(uint32_t type_idx, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object_resolved##suffix##_instrumented(mirror::Class* klass, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object_initialized##suffix##_instrumented(mirror::Class* klass, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object_with_access_check##suffix##_instrumented(uint32_t type_idx, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \ +extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \ void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrumented) { \ if (instrumented) { \ qpoints->pAllocArray = art_quick_alloc_array##suffix##_instrumented; \ diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h index 7d77721f7b..b7e8d50790 100644 --- a/runtime/entrypoints/quick/quick_default_externs.h +++ b/runtime/entrypoints/quick/quick_default_externs.h @@ -19,16 +19,25 @@ #include <cstdint> +namespace art { +namespace mirror { +class Array; +class ArtMethod; +class Class; +class Object; +} // namespace mirror +} // namespace art + // These are extern declarations of assembly stubs with common names. // Cast entrypoints. -extern "C" void art_quick_check_cast(void*, void*); +extern "C" void art_quick_check_cast(const art::mirror::Class*, const art::mirror::Class*); // DexCache entrypoints. -extern "C" void* art_quick_initialize_static_storage(uint32_t, void*); -extern "C" void* art_quick_initialize_type(uint32_t, void*); -extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, void*); -extern "C" void* art_quick_resolve_string(void*, uint32_t); +extern "C" void* art_quick_initialize_static_storage(uint32_t, art::mirror::ArtMethod*); +extern "C" void* art_quick_initialize_type(uint32_t, art::mirror::ArtMethod*); +extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, art::mirror::ArtMethod*); +extern "C" void* art_quick_resolve_string(uint32_t, art::mirror::ArtMethod*); // Field entrypoints. extern "C" int art_quick_set8_instance(uint32_t, void*, int8_t); @@ -57,14 +66,16 @@ extern "C" void* art_quick_get_obj_instance(uint32_t, void*); extern "C" void* art_quick_get_obj_static(uint32_t); // Array entrypoints. -extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*); -extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*); -extern "C" void art_quick_aput_obj(void*, uint32_t, void*); +extern "C" void art_quick_aput_obj_with_null_and_bound_check(art::mirror::Array*, int32_t, + art::mirror::Object*); +extern "C" void art_quick_aput_obj_with_bound_check(art::mirror::Array*, int32_t, + art::mirror::Object*); +extern "C" void art_quick_aput_obj(art::mirror::Array*, int32_t, art::mirror::Object*); extern "C" void art_quick_handle_fill_data(void*, void*); // Lock entrypoints. -extern "C" void art_quick_lock_object(void*); -extern "C" void art_quick_unlock_object(void*); +extern "C" void art_quick_lock_object(art::mirror::Object*); +extern "C" void art_quick_unlock_object(art::mirror::Object*); // Math entrypoints. extern "C" int64_t art_quick_d2l(double); @@ -99,7 +110,7 @@ extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, extern "C" void art_quick_test_suspend(); // Throw entrypoints. -extern "C" void art_quick_deliver_exception(void*); +extern "C" void art_quick_deliver_exception(art::mirror::Object*); extern "C" void art_quick_throw_array_bounds(int32_t index, int32_t limit); extern "C" void art_quick_throw_div_zero(); extern "C" void art_quick_throw_no_such_method(int32_t method_idx); diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc index 2e7c8bab43..348495d354 100644 --- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc @@ -55,8 +55,8 @@ extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type return ResolveVerifyAndClinit(type_idx, referrer, self, false, true); } -extern "C" mirror::String* artResolveStringFromCode(mirror::ArtMethod* referrer, - int32_t string_idx, +extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, + mirror::ArtMethod* referrer, Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h index 8c108a816d..db8c0e3c58 100644 --- a/runtime/entrypoints/quick/quick_entrypoints.h +++ b/runtime/entrypoints/quick/quick_entrypoints.h @@ -28,6 +28,7 @@ namespace art { namespace mirror { +class Array; class ArtMethod; class Class; class Object; diff --git a/runtime/entrypoints/quick/quick_entrypoints_enum.h b/runtime/entrypoints/quick/quick_entrypoints_enum.h index 84158cd8b0..5a95491fbc 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_enum.h +++ b/runtime/entrypoints/quick/quick_entrypoints_enum.h @@ -18,6 +18,7 @@ #define ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_ENUM_H_ #include "quick_entrypoints.h" +#include "quick_entrypoints_enum.h" #include "thread.h" namespace art { @@ -47,10 +48,20 @@ static ThreadOffset<pointer_size> GetThreadOffset(QuickEntrypointEnum trampoline #undef ENTRYPOINT_ENUM }; LOG(FATAL) << "Unexpected trampoline " << static_cast<int>(trampoline); - return ThreadOffset<pointer_size>(-1); + UNREACHABLE(); } -} // namespace art +// Do a check functions to be able to test whether the right signature is used. +template <QuickEntrypointEnum entrypoint, typename... Types> +void CheckEntrypointTypes(); +#define ENTRYPOINT_ENUM(name, ...) \ +template <> inline void CheckEntrypointTypes<kQuick ## name, __VA_ARGS__>() {}; // NOLINT [readability/braces] [4] +#include "quick_entrypoints_list.h" + QUICK_ENTRYPOINT_LIST(ENTRYPOINT_ENUM) +#undef QUICK_ENTRYPOINT_LIST +#undef ENTRYPOINT_ENUM + +} // namespace art #endif // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_ENUM_H_ diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index fbc7913d06..da454f3110 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -20,23 +20,23 @@ // All quick entrypoints. Format is name, return type, argument types. #define QUICK_ENTRYPOINT_LIST(V) \ - V(AllocArray, void*, uint32_t, void*, int32_t) \ - V(AllocArrayResolved, void*, void*, void*, int32_t) \ - V(AllocArrayWithAccessCheck, void*, uint32_t, void*, int32_t) \ - V(AllocObject, void*, uint32_t, void*) \ - V(AllocObjectResolved, void*, void*, void*) \ - V(AllocObjectInitialized, void*, void*, void*) \ - V(AllocObjectWithAccessCheck, void*, uint32_t, void*) \ - V(CheckAndAllocArray, void*, uint32_t, void*, int32_t) \ - V(CheckAndAllocArrayWithAccessCheck, void*, uint32_t, void*, int32_t) \ + V(AllocArray, void*, uint32_t, int32_t, mirror::ArtMethod*) \ + V(AllocArrayResolved, void*, mirror::Class*, int32_t, mirror::ArtMethod*) \ + V(AllocArrayWithAccessCheck, void*, uint32_t, int32_t, mirror::ArtMethod*) \ + V(AllocObject, void*, uint32_t, mirror::ArtMethod*) \ + V(AllocObjectResolved, void*, mirror::Class*, mirror::ArtMethod*) \ + V(AllocObjectInitialized, void*, mirror::Class*, mirror::ArtMethod*) \ + V(AllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*) \ + V(CheckAndAllocArray, void*, uint32_t, int32_t, mirror::ArtMethod*) \ + V(CheckAndAllocArrayWithAccessCheck, void*, uint32_t, int32_t, mirror::ArtMethod*) \ \ V(InstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*) \ - V(CheckCast, void , void*, void*) \ + V(CheckCast, void, const mirror::Class*, const mirror::Class*) \ \ - V(InitializeStaticStorage, void*, uint32_t, void*) \ - V(InitializeTypeAndVerifyAccess, void*, uint32_t, void*) \ - V(InitializeType, void*, uint32_t, void*) \ - V(ResolveString, void*, void*, uint32_t) \ + V(InitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*) \ + V(InitializeTypeAndVerifyAccess, void*, uint32_t, mirror::ArtMethod*) \ + V(InitializeType, void*, uint32_t, mirror::ArtMethod*) \ + V(ResolveString, void*, uint32_t, mirror::ArtMethod*) \ \ V(Set8Instance, int, uint32_t, void*, int8_t) \ V(Set8Static, int, uint32_t, int8_t) \ @@ -63,21 +63,21 @@ V(GetObjInstance, void*, uint32_t, void*) \ V(GetObjStatic, void*, uint32_t) \ \ - V(AputObjectWithNullAndBoundCheck, void, void*, uint32_t, void*) \ - V(AputObjectWithBoundCheck, void, void*, uint32_t, void*) \ - V(AputObject, void, void*, uint32_t, void*) \ + V(AputObjectWithNullAndBoundCheck, void, mirror::Array*, int32_t, mirror::Object*) \ + V(AputObjectWithBoundCheck, void, mirror::Array*, int32_t, mirror::Object*) \ + V(AputObject, void, mirror::Array*, int32_t, mirror::Object*) \ V(HandleFillArrayData, void, void*, void*) \ \ V(JniMethodStart, uint32_t, Thread*) \ - V(JniMethodStartSynchronized, uint32_t, jobject to_lock, Thread* self) \ - V(JniMethodEnd, void, uint32_t cookie, Thread* self) \ - V(JniMethodEndSynchronized, void, uint32_t cookie, jobject locked, Thread* self) \ - V(JniMethodEndWithReference, mirror::Object*, jobject result, uint32_t cookie, Thread* self) \ - V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject result, uint32_t cookie, jobject locked, Thread* self) \ + V(JniMethodStartSynchronized, uint32_t, jobject, Thread*) \ + V(JniMethodEnd, void, uint32_t, Thread*) \ + V(JniMethodEndSynchronized, void, uint32_t, jobject, Thread*) \ + V(JniMethodEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \ + V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, uint32_t, jobject, Thread*) \ V(QuickGenericJniTrampoline, void, mirror::ArtMethod*) \ \ - V(LockObject, void, void*) \ - V(UnlockObject, void, void*) \ + V(LockObject, void, mirror::Object*) \ + V(UnlockObject, void, mirror::Object*) \ \ V(CmpgDouble, int32_t, double, double) \ V(CmpgFloat, int32_t, float, float) \ @@ -114,7 +114,7 @@ \ V(TestSuspend, void, void) \ \ - V(DeliverException, void, void*) \ + V(DeliverException, void, mirror::Object*) \ V(ThrowArrayBounds, void, int32_t, int32_t) \ V(ThrowDivZero, void, void) \ V(ThrowNoSuchMethod, void, int32_t) \ diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 8f09e074f7..2575676bc8 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -52,6 +52,7 @@ #include "gc/space/rosalloc_space-inl.h" #include "gc/space/space-inl.h" #include "gc/space/zygote_space.h" +#include "gc/task_processor.h" #include "entrypoints/quick/quick_alloc_entrypoints.h" #include "heap-inl.h" #include "image.h" @@ -129,10 +130,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max foreground_collector_type_(foreground_collector_type), background_collector_type_(background_collector_type), desired_collector_type_(foreground_collector_type_), - heap_trim_request_lock_(nullptr), - last_trim_time_(0), - heap_transition_or_trim_target_time_(0), - heap_trim_request_pending_(false), + pending_task_lock_(nullptr), parallel_gc_threads_(parallel_gc_threads), conc_gc_threads_(conc_gc_threads), low_memory_mode_(low_memory_mode), @@ -142,8 +140,6 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max zygote_creation_lock_("zygote creation lock", kZygoteCreationLock), zygote_space_(nullptr), large_object_threshold_(large_object_threshold), - gc_request_pending_(false), - conc_gc_running_(false), collector_type_running_(kCollectorTypeNone), last_gc_type_(collector::kGcTypeNone), next_gc_type_(collector::kGcTypePartial), @@ -194,6 +190,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max min_interval_homogeneous_space_compaction_by_oom_( min_interval_homogeneous_space_compaction_by_oom), last_time_homogeneous_space_compaction_by_oom_(NanoTime()), + pending_collector_transition_(nullptr), + pending_heap_trim_(nullptr), use_homogeneous_space_compaction_for_oom_(use_homogeneous_space_compaction_for_oom) { if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) { LOG(INFO) << "Heap() entering"; @@ -409,9 +407,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max gc_complete_lock_ = new Mutex("GC complete lock"); gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable", *gc_complete_lock_)); - gc_request_lock_ = new Mutex("GC request lock"); - gc_request_cond_.reset(new ConditionVariable("GC request condition variable", *gc_request_lock_)); - heap_trim_request_lock_ = new Mutex("Heap trim request lock"); + task_processor_.reset(new TaskProcessor()); + pending_task_lock_ = new Mutex("Pending task lock"); if (ignore_max_footprint_) { SetIdealFootprint(std::numeric_limits<size_t>::max()); concurrent_start_bytes_ = std::numeric_limits<size_t>::max(); @@ -719,8 +716,8 @@ void Heap::VisitObjects(ObjectCallback callback, void* arg) { mirror::Object* obj = *it; if (obj != nullptr && obj->GetClass() != nullptr) { // Avoid the race condition caused by the object not yet being written into the allocation - // stack or the class not yet being written in the object. Or, if kUseThreadLocalAllocationStack, - // there can be nulls on the allocation stack. + // stack or the class not yet being written in the object. Or, if + // kUseThreadLocalAllocationStack, there can be nulls on the allocation stack. callback(obj, arg); } } @@ -872,8 +869,7 @@ Heap::~Heap() { STLDeleteElements(&continuous_spaces_); STLDeleteElements(&discontinuous_spaces_); delete gc_complete_lock_; - delete gc_request_lock_; - delete heap_trim_request_lock_; + delete pending_task_lock_; VLOG(heap) << "Finished ~Heap()"; } @@ -944,37 +940,23 @@ void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType self->ThrowOutOfMemoryError(oss.str().c_str()); } -void Heap::DoPendingTransitionOrTrim() { - Thread* self = Thread::Current(); - CollectorType desired_collector_type; - // Wait until we reach the desired transition time. - while (true) { - uint64_t wait_time; - { - MutexLock mu(self, *heap_trim_request_lock_); - desired_collector_type = desired_collector_type_; - uint64_t current_time = NanoTime(); - if (current_time >= heap_transition_or_trim_target_time_) { - break; - } - wait_time = heap_transition_or_trim_target_time_ - current_time; - } - ScopedThreadStateChange tsc(self, kSleeping); - usleep(wait_time / 1000); // Usleep takes microseconds. - } +void Heap::DoPendingCollectorTransition() { + CollectorType desired_collector_type = desired_collector_type_; // Launch homogeneous space compaction if it is desired. if (desired_collector_type == kCollectorTypeHomogeneousSpaceCompact) { if (!CareAboutPauseTimes()) { PerformHomogeneousSpaceCompact(); + } else { + VLOG(gc) << "Homogeneous compaction ignored due to jank perceptible process state"; } - // No need to Trim(). Homogeneous space compaction may free more virtual and physical memory. - desired_collector_type = collector_type_; - return; + } else { + TransitionCollector(desired_collector_type); } - // Transition the collector if the desired collector type is not the same as the current - // collector type. - TransitionCollector(desired_collector_type); +} + +void Heap::Trim(Thread* self) { if (!CareAboutPauseTimes()) { + ATRACE_BEGIN("Deflating monitors"); // Deflate the monitors, this can cause a pause but shouldn't matter since we don't care // about pauses. Runtime* runtime = Runtime::Current(); @@ -984,9 +966,10 @@ void Heap::DoPendingTransitionOrTrim() { VLOG(heap) << "Deflating " << count << " monitors took " << PrettyDuration(NanoTime() - start_time); runtime->GetThreadList()->ResumeAll(); + ATRACE_END(); } - // Do a heap trim if it is needed. - Trim(); + TrimIndirectReferenceTables(self); + TrimSpaces(self); } class TrimIndirectReferenceTableClosure : public Closure { @@ -1004,17 +987,22 @@ class TrimIndirectReferenceTableClosure : public Closure { Barrier* const barrier_; }; - -void Heap::Trim() { - Thread* self = Thread::Current(); - { - MutexLock mu(self, *heap_trim_request_lock_); - if (!heap_trim_request_pending_ || last_trim_time_ + kHeapTrimWait >= NanoTime()) { - return; - } - last_trim_time_ = NanoTime(); - heap_trim_request_pending_ = false; - } +void Heap::TrimIndirectReferenceTables(Thread* self) { + ScopedObjectAccess soa(self); + ATRACE_BEGIN(__FUNCTION__); + JavaVMExt* vm = soa.Vm(); + // Trim globals indirect reference table. + vm->TrimGlobals(); + // Trim locals indirect reference tables. + Barrier barrier(0); + TrimIndirectReferenceTableClosure closure(&barrier); + ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); + size_t barrier_count = Runtime::Current()->GetThreadList()->RunCheckpoint(&closure); + barrier.Increment(self, barrier_count); + ATRACE_END(); +} + +void Heap::TrimSpaces(Thread* self) { { // Need to do this before acquiring the locks since we don't want to get suspended while // holding any locks. @@ -1026,20 +1014,8 @@ void Heap::Trim() { WaitForGcToCompleteLocked(kGcCauseTrim, self); collector_type_running_ = kCollectorTypeHeapTrim; } - // Trim reference tables. - { - ScopedObjectAccess soa(self); - JavaVMExt* vm = soa.Vm(); - // Trim globals indirect reference table. - vm->TrimGlobals(); - // Trim locals indirect reference tables. - Barrier barrier(0); - TrimIndirectReferenceTableClosure closure(&barrier); - ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); - size_t barrier_count = Runtime::Current()->GetThreadList()->RunCheckpoint(&closure); - barrier.Increment(self, barrier_count); - } - uint64_t start_ns = NanoTime(); + ATRACE_BEGIN(__FUNCTION__); + const uint64_t start_ns = NanoTime(); // Trim the managed spaces. uint64_t total_alloc_space_allocated = 0; uint64_t total_alloc_space_size = 0; @@ -1089,6 +1065,7 @@ void Heap::Trim() { << PrettyDuration(end_ns - gc_heap_end_ns) << ", advised=" << PrettySize(native_reclaimed) << ") heaps. Managed heap utilization of " << static_cast<int>(100 * managed_utilization) << "%."; + ATRACE_END(); } bool Heap::IsValidObjectAddress(const mirror::Object* obj) const { @@ -1639,7 +1616,6 @@ HomogeneousSpaceCompactResult Heap::PerformHomogeneousSpaceCompact() { return HomogeneousSpaceCompactResult::kSuccess; } - void Heap::TransitionCollector(CollectorType collector_type) { if (collector_type == collector_type_) { return; @@ -2130,7 +2106,9 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus ScopedThreadStateChange tsc(self, kWaitingPerformingGc); Locks::mutator_lock_->AssertNotHeld(self); if (self->IsHandlingStackOverflow()) { - LOG(WARNING) << "Performing GC on a thread that is handling a stack overflow."; + // If we are throwing a stack overflow error we probably don't have enough remaining stack + // space to run the GC. + return collector::kGcTypeNone; } bool compacting_gc; { @@ -2207,7 +2185,7 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus collector->Run(gc_cause, clear_soft_references || runtime->IsZygote()); total_objects_freed_ever_ += GetCurrentGcIteration()->GetFreedObjects(); total_bytes_freed_ever_ += GetCurrentGcIteration()->GetFreedBytes(); - RequestHeapTrim(); + RequestTrim(self); // Enqueue cleared references. reference_processor_.EnqueueClearedReferences(self); // Grow the heap so that we know when to perform the next GC. @@ -3032,52 +3010,109 @@ void Heap::RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj) RequestConcurrentGC(self); } -void Heap::RequestConcurrentGC(Thread* self) { - // Make sure that we can do a concurrent GC. +class Heap::ConcurrentGCTask : public HeapTask { + public: + explicit ConcurrentGCTask(uint64_t target_time) : HeapTask(target_time) { } + virtual void Run(Thread* self) OVERRIDE { + gc::Heap* heap = Runtime::Current()->GetHeap(); + heap->ConcurrentGC(self); + heap->ClearConcurrentGCRequest(); + } +}; + +static bool CanAddHeapTask(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_) { Runtime* runtime = Runtime::Current(); - if (runtime == nullptr || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self) || - self->IsHandlingStackOverflow()) { - return; + return runtime != nullptr && runtime->IsFinishedStarting() && !runtime->IsShuttingDown(self) && + !self->IsHandlingStackOverflow(); +} + +void Heap::ClearConcurrentGCRequest() { + concurrent_gc_pending_.StoreRelaxed(false); +} + +void Heap::RequestConcurrentGC(Thread* self) { + if (CanAddHeapTask(self) && + concurrent_gc_pending_.CompareExchangeStrongSequentiallyConsistent(false, true)) { + task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime())); // Start straight away. } - NotifyConcurrentGCRequest(self); } void Heap::ConcurrentGC(Thread* self) { - if (Runtime::Current()->IsShuttingDown(self)) { - return; - } - // Wait for any GCs currently running to finish. - if (WaitForGcToComplete(kGcCauseBackground, self) == collector::kGcTypeNone) { - // If the we can't run the GC type we wanted to run, find the next appropriate one and try that - // instead. E.g. can't do partial, so do full instead. - if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) == - collector::kGcTypeNone) { - for (collector::GcType gc_type : gc_plan_) { - // Attempt to run the collector, if we succeed, we are done. - if (gc_type > next_gc_type_ && - CollectGarbageInternal(gc_type, kGcCauseBackground, false) != collector::kGcTypeNone) { - break; + if (!Runtime::Current()->IsShuttingDown(self)) { + // Wait for any GCs currently running to finish. + if (WaitForGcToComplete(kGcCauseBackground, self) == collector::kGcTypeNone) { + // If the we can't run the GC type we wanted to run, find the next appropriate one and try that + // instead. E.g. can't do partial, so do full instead. + if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) == + collector::kGcTypeNone) { + for (collector::GcType gc_type : gc_plan_) { + // Attempt to run the collector, if we succeed, we are done. + if (gc_type > next_gc_type_ && + CollectGarbageInternal(gc_type, kGcCauseBackground, false) != + collector::kGcTypeNone) { + break; + } } } } } } +class Heap::CollectorTransitionTask : public HeapTask { + public: + explicit CollectorTransitionTask(uint64_t target_time) : HeapTask(target_time) { } + virtual void Run(Thread* self) OVERRIDE { + gc::Heap* heap = Runtime::Current()->GetHeap(); + heap->DoPendingCollectorTransition(); + heap->ClearPendingCollectorTransition(self); + } +}; + +void Heap::ClearPendingCollectorTransition(Thread* self) { + MutexLock mu(self, *pending_task_lock_); + pending_collector_transition_ = nullptr; +} + void Heap::RequestCollectorTransition(CollectorType desired_collector_type, uint64_t delta_time) { Thread* self = Thread::Current(); + desired_collector_type_ = desired_collector_type; + if (desired_collector_type_ == collector_type_ || !CanAddHeapTask(self)) { + return; + } + CollectorTransitionTask* added_task = nullptr; + const uint64_t target_time = NanoTime() + delta_time; { - MutexLock mu(self, *heap_trim_request_lock_); - if (desired_collector_type_ == desired_collector_type) { + MutexLock mu(self, *pending_task_lock_); + // If we have an existing collector transition, update the targe time to be the new target. + if (pending_collector_transition_ != nullptr) { + task_processor_->UpdateTargetRunTime(self, pending_collector_transition_, target_time); return; } - heap_transition_or_trim_target_time_ = - std::max(heap_transition_or_trim_target_time_, NanoTime() + delta_time); - desired_collector_type_ = desired_collector_type; + added_task = new CollectorTransitionTask(target_time); + pending_collector_transition_ = added_task; + } + task_processor_->AddTask(self, added_task); +} + +class Heap::HeapTrimTask : public HeapTask { + public: + explicit HeapTrimTask(uint64_t delta_time) : HeapTask(NanoTime() + delta_time) { } + virtual void Run(Thread* self) OVERRIDE { + gc::Heap* heap = Runtime::Current()->GetHeap(); + heap->Trim(self); + heap->ClearPendingTrim(self); } - SignalHeapTrimDaemon(self); +}; + +void Heap::ClearPendingTrim(Thread* self) { + MutexLock mu(self, *pending_task_lock_); + pending_heap_trim_ = nullptr; } -void Heap::RequestHeapTrim() { +void Heap::RequestTrim(Thread* self) { + if (!CanAddHeapTask(self)) { + return; + } // GC completed and now we must decide whether to request a heap trim (advising pages back to the // kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans // a space it will hold its lock and can become a cause of jank. @@ -3090,42 +3125,17 @@ void Heap::RequestHeapTrim() { // to utilization (which is probably inversely proportional to how much benefit we can expect). // We could try mincore(2) but that's only a measure of how many pages we haven't given away, // not how much use we're making of those pages. - - Thread* self = Thread::Current(); - Runtime* runtime = Runtime::Current(); - if (runtime == nullptr || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self) || - runtime->IsZygote()) { - // Ignore the request if we are the zygote to prevent app launching lag due to sleep in heap - // trimmer daemon. b/17310019 - // Heap trimming isn't supported without a Java runtime or Daemons (such as at dex2oat time) - // Also: we do not wish to start a heap trim if the runtime is shutting down (a racy check - // as we don't hold the lock while requesting the trim). - return; - } + HeapTrimTask* added_task = nullptr; { - MutexLock mu(self, *heap_trim_request_lock_); - if (last_trim_time_ + kHeapTrimWait >= NanoTime()) { - // We have done a heap trim in the last kHeapTrimWait nanosecs, don't request another one - // just yet. + MutexLock mu(self, *pending_task_lock_); + if (pending_heap_trim_ != nullptr) { + // Already have a heap trim request in task processor, ignore this request. return; } - heap_trim_request_pending_ = true; - uint64_t current_time = NanoTime(); - if (heap_transition_or_trim_target_time_ < current_time) { - heap_transition_or_trim_target_time_ = current_time + kHeapTrimWait; - } + added_task = new HeapTrimTask(kHeapTrimWait); + pending_heap_trim_ = added_task; } - // Notify the daemon thread which will actually do the heap trim. - SignalHeapTrimDaemon(self); -} - -void Heap::SignalHeapTrimDaemon(Thread* self) { - JNIEnv* env = self->GetJniEnv(); - DCHECK(WellKnownClasses::java_lang_Daemons != nullptr); - DCHECK(WellKnownClasses::java_lang_Daemons_requestHeapTrim != nullptr); - env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons, - WellKnownClasses::java_lang_Daemons_requestHeapTrim); - CHECK(!env->ExceptionCheck()); + task_processor_->AddTask(self, added_task); } void Heap::RevokeThreadLocalBuffers(Thread* thread) { @@ -3153,7 +3163,7 @@ void Heap::RevokeAllThreadLocalBuffers() { } bool Heap::IsGCRequestPending() const { - return concurrent_start_bytes_ != std::numeric_limits<size_t>::max(); + return concurrent_gc_pending_.LoadRelaxed(); } void Heap::RunFinalization(JNIEnv* env) { @@ -3235,7 +3245,7 @@ void Heap::AddModUnionTable(accounting::ModUnionTable* mod_union_table) { } void Heap::CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) { - CHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) || + CHECK(c == nullptr || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) || (c->IsVariableSize() || c->GetObjectSize() == byte_count)); CHECK_GE(byte_count, sizeof(mirror::Object)); } @@ -3272,25 +3282,5 @@ void Heap::ClearMarkedObjects() { } } -void Heap::WaitForConcurrentGCRequest(Thread* self) { - ScopedThreadStateChange tsc(self, kBlocked); - MutexLock mu(self, *gc_request_lock_); - conc_gc_running_ = false; - while (!gc_request_pending_) { - gc_request_cond_->Wait(self); - } - gc_request_pending_ = false; - conc_gc_running_ = true; -} - -void Heap::NotifyConcurrentGCRequest(Thread* self) { - ScopedThreadStateChange tsc(self, kBlocked); - MutexLock mu(self, *gc_request_lock_); - if (!conc_gc_running_) { - gc_request_pending_ = true; - gc_request_cond_->Signal(self); - } -} - } // namespace gc } // namespace art diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index cf94eb6a9d..1738124c0c 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -57,6 +57,7 @@ namespace mirror { namespace gc { class ReferenceProcessor; +class TaskProcessor; namespace accounting { class HeapBitmap; @@ -470,11 +471,11 @@ class Heap { void DumpForSigQuit(std::ostream& os); - // Do a pending heap transition or trim. - void DoPendingTransitionOrTrim() LOCKS_EXCLUDED(heap_trim_request_lock_); + // Do a pending collector transition. + void DoPendingCollectorTransition(); - // Trim the managed and native heaps by releasing unused memory back to the OS. - void Trim() LOCKS_EXCLUDED(heap_trim_request_lock_); + // Deflate monitors, ... and trim the spaces. + void Trim(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_); void RevokeThreadLocalBuffers(Thread* thread); void RevokeRosAllocThreadLocalBuffers(Thread* thread); @@ -606,15 +607,25 @@ class Heap { ReferenceProcessor* GetReferenceProcessor() { return &reference_processor_; } + TaskProcessor* GetTaskProcessor() { + return task_processor_.get(); + } bool HasZygoteSpace() const { return zygote_space_ != nullptr; } - void WaitForConcurrentGCRequest(Thread* self) LOCKS_EXCLUDED(gc_request_lock_); - void NotifyConcurrentGCRequest(Thread* self) LOCKS_EXCLUDED(gc_request_lock_); + // Request an asynchronous trim. + void RequestTrim(Thread* self) LOCKS_EXCLUDED(pending_task_lock_); + + // Request asynchronous GC. + void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(pending_task_lock_); private: + class ConcurrentGCTask; + class CollectorTransitionTask; + class HeapTrimTask; + // Compact source space to target space. void Compact(space::ContinuousMemMapAllocSpace* target_space, space::ContinuousMemMapAllocSpace* source_space, @@ -705,12 +716,10 @@ class Heap { EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_); void RequestCollectorTransition(CollectorType desired_collector_type, uint64_t delta_time) - LOCKS_EXCLUDED(heap_trim_request_lock_); - void RequestHeapTrim() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_); + LOCKS_EXCLUDED(pending_task_lock_); + void RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void RequestConcurrentGC(Thread* self) - LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_); bool IsGCRequestPending() const; // Sometimes CollectGarbageInternal decides to run a different Gc than you requested. Returns @@ -771,10 +780,6 @@ class Heap { // Clear cards and update the mod union table. void ProcessCards(TimingLogger* timings, bool use_rem_sets); - // Signal the heap trim daemon that there is something to do, either a heap transition or heap - // trim. - void SignalHeapTrimDaemon(Thread* self); - // Push an object onto the allocation stack. void PushOnAllocationStack(Thread* self, mirror::Object** obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -783,12 +788,22 @@ class Heap { void PushOnThreadLocalAllocationStackWithInternalGC(Thread* thread, mirror::Object** obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void ClearConcurrentGCRequest(); + void ClearPendingTrim(Thread* self) LOCKS_EXCLUDED(pending_task_lock_); + void ClearPendingCollectorTransition(Thread* self) LOCKS_EXCLUDED(pending_task_lock_); + // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark // sweep GC, false for other GC types. bool IsGcConcurrent() const ALWAYS_INLINE { return collector_type_ == kCollectorTypeCMS || collector_type_ == kCollectorTypeCC; } + // Trim the managed and native spaces by releasing unused memory back to the OS. + void TrimSpaces(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_); + + // Trim 0 pages at the end of reference tables. + void TrimIndirectReferenceTables(Thread* self); + // All-known continuous spaces, where objects lie within fixed bounds. std::vector<space::ContinuousSpace*> continuous_spaces_; @@ -835,14 +850,8 @@ class Heap { // Desired collector type, heap trimming daemon transitions the heap if it is != collector_type_. CollectorType desired_collector_type_; - // Lock which guards heap trim requests. - Mutex* heap_trim_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - // When we want to perform the next heap trim (nano seconds). - uint64_t last_trim_time_ GUARDED_BY(heap_trim_request_lock_); - // When we want to perform the next heap transition (nano seconds) or heap trim. - uint64_t heap_transition_or_trim_target_time_ GUARDED_BY(heap_trim_request_lock_); - // If we have a heap trim request pending. - bool heap_trim_request_pending_ GUARDED_BY(heap_trim_request_lock_); + // Lock which guards pending tasks. + Mutex* pending_task_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; // How many GC threads we may use for paused parts of garbage collection. const size_t parallel_gc_threads_; @@ -879,15 +888,12 @@ class Heap { Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; std::unique_ptr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_); - // Guards concurrent GC requests. - Mutex* gc_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - std::unique_ptr<ConditionVariable> gc_request_cond_ GUARDED_BY(gc_request_lock_); - bool gc_request_pending_ GUARDED_BY(gc_request_lock_); - bool conc_gc_running_ GUARDED_BY(gc_request_lock_); - // Reference processor; ReferenceProcessor reference_processor_; + // Task processor, proxies heap trim requests to the daemon threads. + std::unique_ptr<TaskProcessor> task_processor_; + // True while the garbage collector is running. volatile CollectorType collector_type_running_ GUARDED_BY(gc_complete_lock_); @@ -1060,9 +1066,17 @@ class Heap { // Count for performed homogeneous space compaction. Atomic<size_t> count_performed_homogeneous_space_compaction_; + // Whether or not a concurrent GC is pending. + Atomic<bool> concurrent_gc_pending_; + + // Active tasks which we can modify (change target time, desired collector type, etc..). + CollectorTransitionTask* pending_collector_transition_ GUARDED_BY(pending_task_lock_); + HeapTrimTask* pending_heap_trim_ GUARDED_BY(pending_task_lock_); + // Whether or not we use homogeneous space compaction to avoid OOM errors. bool use_homogeneous_space_compaction_for_oom_; + friend class CollectorTransitionTask; friend class collector::GarbageCollector; friend class collector::MarkCompact; friend class collector::MarkSweep; diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc index 99bd63fa8a..01e8795669 100644 --- a/runtime/gc/reference_processor.cc +++ b/runtime/gc/reference_processor.cc @@ -23,11 +23,14 @@ #include "reflection.h" #include "ScopedLocalRef.h" #include "scoped_thread_state_change.h" +#include "task_processor.h" #include "well_known_classes.h" namespace art { namespace gc { +static constexpr bool kAsyncReferenceQueueAdd = false; + ReferenceProcessor::ReferenceProcessor() : process_references_args_(nullptr, nullptr, nullptr), preserving_references_(false), @@ -213,17 +216,43 @@ void ReferenceProcessor::UpdateRoots(IsMarkedCallback* callback, void* arg) { cleared_references_.UpdateRoots(callback, arg); } +class ClearedReferenceTask : public HeapTask { + public: + explicit ClearedReferenceTask(jobject cleared_references) + : HeapTask(NanoTime()), cleared_references_(cleared_references) { + } + virtual void Run(Thread* thread) { + ScopedObjectAccess soa(thread); + jvalue args[1]; + args[0].l = cleared_references_; + InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_ReferenceQueue_add, args); + soa.Env()->DeleteGlobalRef(cleared_references_); + } + + private: + const jobject cleared_references_; +}; + void ReferenceProcessor::EnqueueClearedReferences(Thread* self) { Locks::mutator_lock_->AssertNotHeld(self); + // When a runtime isn't started there are no reference queues to care about so ignore. if (!cleared_references_.IsEmpty()) { - // When a runtime isn't started there are no reference queues to care about so ignore. if (LIKELY(Runtime::Current()->IsStarted())) { - ScopedObjectAccess soa(self); - ScopedLocalRef<jobject> arg(self->GetJniEnv(), - soa.AddLocalReference<jobject>(cleared_references_.GetList())); - jvalue args[1]; - args[0].l = arg.get(); - InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_ReferenceQueue_add, args); + jobject cleared_references; + { + ReaderMutexLock mu(self, *Locks::mutator_lock_); + cleared_references = self->GetJniEnv()->vm->AddGlobalRef( + self, cleared_references_.GetList()); + } + if (kAsyncReferenceQueueAdd) { + // TODO: This can cause RunFinalization to terminate before newly freed objects are + // finalized since they may not be enqueued by the time RunFinalization starts. + Runtime::Current()->GetHeap()->GetTaskProcessor()->AddTask( + self, new ClearedReferenceTask(cleared_references)); + } else { + ClearedReferenceTask task(cleared_references); + task.Run(self); + } } cleared_references_.Clear(); } diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc index 74d1a2b7db..ced25a40bb 100644 --- a/runtime/gc/space/rosalloc_space.cc +++ b/runtime/gc/space/rosalloc_space.cc @@ -365,8 +365,9 @@ void RosAllocSpace::Clear() { mark_bitmap_->Clear(); SetEnd(begin_ + starting_size_); delete rosalloc_; - rosalloc_ = CreateRosAlloc(mem_map_->Begin(), starting_size_, initial_size_, Capacity(), - low_memory_mode_, Runtime::Current()->RunningOnValgrind()); + rosalloc_ = CreateRosAlloc(mem_map_->Begin(), starting_size_, initial_size_, + NonGrowthLimitCapacity(), low_memory_mode_, + Runtime::Current()->RunningOnValgrind()); SetFootprintLimit(footprint_limit); } diff --git a/runtime/gc/task_processor.cc b/runtime/gc/task_processor.cc new file mode 100644 index 0000000000..1a3c6f5399 --- /dev/null +++ b/runtime/gc/task_processor.cc @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "task_processor.h" + +#include "scoped_thread_state_change.h" + +namespace art { +namespace gc { + +TaskProcessor::TaskProcessor() + : lock_(new Mutex("Task processor lock", kReferenceProcessorLock)), is_running_(false) { + // Piggyback off the reference processor lock level. + cond_.reset(new ConditionVariable("Task processor condition", *lock_)); +} + +TaskProcessor::~TaskProcessor() { + delete lock_; +} + +void TaskProcessor::AddTask(Thread* self, HeapTask* task) { + ScopedThreadStateChange tsc(self, kBlocked); + MutexLock mu(self, *lock_); + tasks_.insert(task); + cond_->Signal(self); +} + +HeapTask* TaskProcessor::GetTask(Thread* self) { + ScopedThreadStateChange tsc(self, kBlocked); + MutexLock mu(self, *lock_); + while (true) { + if (tasks_.empty()) { + if (!is_running_) { + return nullptr; + } + cond_->Wait(self); // Empty queue, wait until we are signalled. + } else { + // Non empty queue, look at the top element and see if we are ready to run it. + const uint64_t current_time = NanoTime(); + HeapTask* task = *tasks_.begin(); + // If we are shutting down, return the task right away without waiting. Otherwise return the + // task if it is late enough. + uint64_t target_time = task->GetTargetRunTime(); + if (!is_running_ || target_time <= current_time) { + tasks_.erase(tasks_.begin()); + return task; + } + DCHECK_GT(target_time, current_time); + // Wait untl we hit the target run time. + const uint64_t delta_time = target_time - current_time; + const uint64_t ms_delta = NsToMs(delta_time); + const uint64_t ns_delta = delta_time - MsToNs(ms_delta); + cond_->TimedWait(self, static_cast<int64_t>(ms_delta), static_cast<int32_t>(ns_delta)); + } + } + UNREACHABLE(); + return nullptr; +} + +void TaskProcessor::UpdateTargetRunTime(Thread* self, HeapTask* task, uint64_t new_target_time) { + MutexLock mu(self, *lock_); + // Find the task. + auto range = tasks_.equal_range(task); + for (auto it = range.first; it != range.second; ++it) { + if (*it == task) { + // Check if the target time was updated, if so re-insert then wait. + if (new_target_time != task->GetTargetRunTime()) { + tasks_.erase(it); + task->SetTargetRunTime(new_target_time); + tasks_.insert(task); + // If we became the first task then we may need to signal since we changed the task that we + // are sleeping on. + if (*tasks_.begin() == task) { + cond_->Signal(self); + } + return; + } + } + } +} + +bool TaskProcessor::IsRunning() const { + MutexLock mu(Thread::Current(), *lock_); + return is_running_; +} + +void TaskProcessor::Stop(Thread* self) { + MutexLock mu(self, *lock_); + is_running_ = false; + cond_->Broadcast(self); +} + +void TaskProcessor::Start(Thread* self) { + MutexLock mu(self, *lock_); + is_running_ = true; +} + +void TaskProcessor::RunAllTasks(Thread* self) { + while (true) { + // Wait and get a task, may be interrupted. + HeapTask* task = GetTask(self); + if (task != nullptr) { + task->Run(self); + task->Finalize(); + } else if (!IsRunning()) { + break; + } + } +} + +} // namespace gc +} // namespace art diff --git a/runtime/gc/task_processor.h b/runtime/gc/task_processor.h new file mode 100644 index 0000000000..765f03557e --- /dev/null +++ b/runtime/gc/task_processor.h @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_GC_TASK_PROCESSOR_H_ +#define ART_RUNTIME_GC_TASK_PROCESSOR_H_ + +#include <memory> +#include <set> + +#include "base/mutex.h" +#include "globals.h" +#include "thread_pool.h" + +namespace art { +namespace gc { + +class HeapTask : public SelfDeletingTask { + public: + explicit HeapTask(uint64_t target_run_time) : target_run_time_(target_run_time) { + } + uint64_t GetTargetRunTime() const { + return target_run_time_; + } + + private: + // Update the updated_target_run_time_, the task processor will re-insert the task when it is + // popped and update the target_run_time_. + void SetTargetRunTime(uint64_t new_target_run_time) { + target_run_time_ = new_target_run_time; + } + + // Time in ns at which we want the task to run. + uint64_t target_run_time_; + + friend class TaskProcessor; +}; + +// Used to process GC tasks (heap trim, heap transitions, concurrent GC). +class TaskProcessor { + public: + TaskProcessor(); + virtual ~TaskProcessor(); + void AddTask(Thread* self, HeapTask* task) LOCKS_EXCLUDED(lock_); + HeapTask* GetTask(Thread* self) LOCKS_EXCLUDED(lock_); + void Start(Thread* self) LOCKS_EXCLUDED(lock_); + // Stop tells the RunAllTasks to finish up the remaining tasks as soon as + // possible then return. + void Stop(Thread* self) LOCKS_EXCLUDED(lock_); + void RunAllTasks(Thread* self) LOCKS_EXCLUDED(lock_); + bool IsRunning() const LOCKS_EXCLUDED(lock_); + void UpdateTargetRunTime(Thread* self, HeapTask* target_time, uint64_t new_target_time) + LOCKS_EXCLUDED(lock_); + + private: + class CompareByTargetRunTime { + public: + bool operator()(const HeapTask* a, const HeapTask* b) const { + return a->GetTargetRunTime() < b->GetTargetRunTime(); + } + }; + + mutable Mutex* lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + bool is_running_ GUARDED_BY(lock_); + std::unique_ptr<ConditionVariable> cond_ GUARDED_BY(lock_); + std::multiset<HeapTask*, CompareByTargetRunTime> tasks_ GUARDED_BY(lock_); +}; + +} // namespace gc +} // namespace art + +#endif // ART_RUNTIME_GC_TASK_PROCESSOR_H_ diff --git a/runtime/gc/task_processor_test.cc b/runtime/gc/task_processor_test.cc new file mode 100644 index 0000000000..5dd6d8fb7b --- /dev/null +++ b/runtime/gc/task_processor_test.cc @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common_runtime_test.h" +#include "task_processor.h" +#include "thread_pool.h" +#include "thread-inl.h" +#include "utils.h" + +namespace art { +namespace gc { + +class TaskProcessorTest : public CommonRuntimeTest { + public: +}; + +class RecursiveTask : public HeapTask { + public: + RecursiveTask(TaskProcessor* task_processor, Atomic<size_t>* counter, size_t max_recursion) + : HeapTask(NanoTime() + MsToNs(10)), task_processor_(task_processor), counter_(counter), + max_recursion_(max_recursion) { + } + virtual void Run(Thread* self) OVERRIDE { + if (max_recursion_ > 0) { + task_processor_->AddTask(self, + new RecursiveTask(task_processor_, counter_, max_recursion_ - 1)); + counter_->FetchAndAddSequentiallyConsistent(1U); + } + } + + private: + TaskProcessor* const task_processor_; + Atomic<size_t>* const counter_; + const size_t max_recursion_; +}; + +class WorkUntilDoneTask : public SelfDeletingTask { + public: + WorkUntilDoneTask(TaskProcessor* task_processor, Atomic<bool>* done_running) + : task_processor_(task_processor), done_running_(done_running) { + } + virtual void Run(Thread* self) OVERRIDE { + task_processor_->RunAllTasks(self); + done_running_->StoreSequentiallyConsistent(true); + } + + private: + TaskProcessor* const task_processor_; + Atomic<bool>* done_running_; +}; + +TEST_F(TaskProcessorTest, Interrupt) { + ThreadPool thread_pool("task processor test", 1U); + Thread* const self = Thread::Current(); + TaskProcessor task_processor; + static constexpr size_t kRecursion = 10; + Atomic<bool> done_running(false); + Atomic<size_t> counter(0); + task_processor.AddTask(self, new RecursiveTask(&task_processor, &counter, kRecursion)); + task_processor.Start(self); + // Add a task which will wait until interrupted to the thread pool. + thread_pool.AddTask(self, new WorkUntilDoneTask(&task_processor, &done_running)); + thread_pool.StartWorkers(self); + ASSERT_FALSE(done_running); + // Wait until all the tasks are done, but since we didn't interrupt, done_running should be 0. + while (counter.LoadSequentiallyConsistent() != kRecursion) { + usleep(10); + } + ASSERT_FALSE(done_running); + task_processor.Stop(self); + thread_pool.Wait(self, true, false); + // After the interrupt and wait, the WorkUntilInterruptedTasktask should have terminated and + // set done_running_ to true. + ASSERT_TRUE(done_running.LoadSequentiallyConsistent()); + + // Test that we finish remaining tasks before returning from RunTasksUntilInterrupted. + counter.StoreSequentiallyConsistent(0); + done_running.StoreSequentiallyConsistent(false); + // Self interrupt before any of the other tasks run, but since we added them we should keep on + // working until all the tasks are completed. + task_processor.Stop(self); + task_processor.AddTask(self, new RecursiveTask(&task_processor, &counter, kRecursion)); + thread_pool.AddTask(self, new WorkUntilDoneTask(&task_processor, &done_running)); + thread_pool.StartWorkers(self); + thread_pool.Wait(self, true, false); + ASSERT_TRUE(done_running.LoadSequentiallyConsistent()); + ASSERT_EQ(counter.LoadSequentiallyConsistent(), kRecursion); +} + +class TestOrderTask : public HeapTask { + public: + explicit TestOrderTask(uint64_t expected_time, size_t expected_counter, size_t* counter) + : HeapTask(expected_time), expected_counter_(expected_counter), counter_(counter) { + } + virtual void Run(Thread* thread) OVERRIDE { + UNUSED(thread); // Fix cppling bug. + ASSERT_EQ(*counter_, expected_counter_); + ++*counter_; + } + + private: + const size_t expected_counter_; + size_t* const counter_; +}; + +TEST_F(TaskProcessorTest, Ordering) { + static const size_t kNumTasks = 25; + const uint64_t current_time = NanoTime(); + Thread* const self = Thread::Current(); + TaskProcessor task_processor; + task_processor.Stop(self); + size_t counter = 0; + std::vector<std::pair<uint64_t, size_t>> orderings; + for (size_t i = 0; i < kNumTasks; ++i) { + orderings.push_back(std::make_pair(current_time + MsToNs(10U * i), i)); + } + for (size_t i = 0; i < kNumTasks; ++i) { + std::swap(orderings[i], orderings[(i * 87654231 + 12345) % orderings.size()]); + } + for (const auto& pair : orderings) { + auto* task = new TestOrderTask(pair.first, pair.second, &counter); + task_processor.AddTask(self, task); + } + ThreadPool thread_pool("task processor test", 1U); + Atomic<bool> done_running(false); + // Add a task which will wait until interrupted to the thread pool. + thread_pool.AddTask(self, new WorkUntilDoneTask(&task_processor, &done_running)); + ASSERT_FALSE(done_running.LoadSequentiallyConsistent()); + thread_pool.StartWorkers(self); + thread_pool.Wait(self, true, false); + ASSERT_TRUE(done_running.LoadSequentiallyConsistent()); + ASSERT_EQ(counter, kNumTasks); +} + +} // namespace gc +} // namespace art diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc index c6102633be..8fcbf908a2 100644 --- a/runtime/interpreter/interpreter_goto_table_impl.cc +++ b/runtime/interpreter/interpreter_goto_table_impl.cc @@ -543,7 +543,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF HANDLE_INSTRUCTION_START(NEW_ARRAY) { int32_t length = shadow_frame.GetVReg(inst->VRegB_22c(inst_data)); Object* obj = AllocArrayFromCode<do_access_check, true>( - inst->VRegC_22c(), shadow_frame.GetMethod(), length, self, + inst->VRegC_22c(), length, shadow_frame.GetMethod(), self, Runtime::Current()->GetHeap()->GetCurrentAllocator()); if (UNLIKELY(obj == NULL)) { HANDLE_PENDING_EXCEPTION(); diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index 8bbc69481a..38665c7e0b 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -455,7 +455,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, PREAMBLE(); int32_t length = shadow_frame.GetVReg(inst->VRegB_22c(inst_data)); Object* obj = AllocArrayFromCode<do_access_check, true>( - inst->VRegC_22c(), shadow_frame.GetMethod(), length, self, + inst->VRegC_22c(), length, shadow_frame.GetMethod(), self, Runtime::Current()->GetHeap()->GetCurrentAllocator()); if (UNLIKELY(obj == NULL)) { HANDLE_PENDING_EXCEPTION(); diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc index 4797e696d8..37ad46e209 100644 --- a/runtime/jni_internal.cc +++ b/runtime/jni_internal.cc @@ -2256,8 +2256,10 @@ class JNI { java_buffer, WellKnownClasses::java_nio_DirectByteBuffer_capacity)); } - static jobjectRefType GetObjectRefType(JNIEnv* env, jobject java_object) { - CHECK_NON_NULL_ARGUMENT_RETURN(java_object, JNIInvalidRefType); + static jobjectRefType GetObjectRefType(JNIEnv* env ATTRIBUTE_UNUSED, jobject java_object) { + if (java_object == nullptr) { + return JNIInvalidRefType; + } // Do we definitely know what kind of reference this is? IndirectRef ref = reinterpret_cast<IndirectRef>(java_object); @@ -2274,7 +2276,7 @@ class JNI { return JNILocalRefType; } LOG(FATAL) << "IndirectRefKind[" << kind << "]"; - return JNIInvalidRefType; + UNREACHABLE(); } private: diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc index 045fe2f811..8e329687c4 100644 --- a/runtime/jni_internal_test.cc +++ b/runtime/jni_internal_test.cc @@ -1300,16 +1300,20 @@ TEST_F(JniInternalTest, GetObjectRefType) { jweak weak_global = env_->NewWeakGlobalRef(local); EXPECT_EQ(JNIWeakGlobalRefType, env_->GetObjectRefType(weak_global)); - CheckJniAbortCatcher jni_abort_catcher; - jobject invalid = reinterpret_cast<jobject>(this); - EXPECT_EQ(JNIInvalidRefType, env_->GetObjectRefType(invalid)); - jni_abort_catcher.Check("use of invalid jobject"); + { + CheckJniAbortCatcher jni_abort_catcher; + jobject invalid = reinterpret_cast<jobject>(this); + EXPECT_EQ(JNIInvalidRefType, env_->GetObjectRefType(invalid)); + jni_abort_catcher.Check("use of invalid jobject"); + } // TODO: invoke a native method and test that its arguments are considered local references. - // Null as object should fail. + // Null as pointer should not fail and return invalid-ref. b/18820997 EXPECT_EQ(JNIInvalidRefType, env_->GetObjectRefType(nullptr)); - jni_abort_catcher.Check("java_object == null"); + + // TODO: Null as reference should return the original type. + // This requires running a GC so a non-null object gets freed. } TEST_F(JniInternalTest, StaleWeakGlobal) { diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc index ae5f60acae..f9c00ce7e9 100644 --- a/runtime/mirror/object_test.cc +++ b/runtime/mirror/object_test.cc @@ -313,7 +313,7 @@ TEST_F(ObjectTest, CheckAndAllocArrayFromCode) { java_lang_dex_file_->GetIndexForStringId(*string_id)); ASSERT_TRUE(type_id != NULL); uint32_t type_idx = java_lang_dex_file_->GetIndexForTypeId(*type_id); - Object* array = CheckAndAllocArrayFromCodeInstrumented(type_idx, sort, 3, Thread::Current(), false, + Object* array = CheckAndAllocArrayFromCodeInstrumented(type_idx, 3, sort, Thread::Current(), false, Runtime::Current()->GetHeap()->GetCurrentAllocator()); EXPECT_TRUE(array->IsArrayInstance()); EXPECT_EQ(3, array->AsArray()->GetLength()); diff --git a/runtime/monitor.cc b/runtime/monitor.cc index 1ef5221627..ef63080649 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -1001,14 +1001,9 @@ void Monitor::VisitLocks(StackVisitor* stack_visitor, void (*callback)(mirror::O // the locks held in this stack frame. std::vector<uint32_t> monitor_enter_dex_pcs; verifier::MethodVerifier::FindLocksAtDexPc(m, dex_pc, &monitor_enter_dex_pcs); - if (monitor_enter_dex_pcs.empty()) { - return; - } - - for (size_t i = 0; i < monitor_enter_dex_pcs.size(); ++i) { + for (uint32_t monitor_dex_pc : monitor_enter_dex_pcs) { // The verifier works in terms of the dex pcs of the monitor-enter instructions. // We want the registers used by those instructions (so we can read the values out of them). - uint32_t monitor_dex_pc = monitor_enter_dex_pcs[i]; uint16_t monitor_enter_instruction = code_item->insns_[monitor_dex_pc]; // Quick sanity check. @@ -1018,8 +1013,8 @@ void Monitor::VisitLocks(StackVisitor* stack_visitor, void (*callback)(mirror::O } uint16_t monitor_register = ((monitor_enter_instruction >> 8) & 0xff); - mirror::Object* o = reinterpret_cast<mirror::Object*>(stack_visitor->GetVReg(m, monitor_register, - kReferenceVReg)); + mirror::Object* o = reinterpret_cast<mirror::Object*>( + stack_visitor->GetVReg(m, monitor_register, kReferenceVReg)); callback(o, callback_context); } } diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index f37312efa7..44c6d87da5 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -301,7 +301,10 @@ static jbyte IsDexOptNeededForFile(const std::string& oat_filename, const char* nullptr, false, &error_msg)); if (oat_file.get() == nullptr) { - if (kReasonLogging) { + // Note that even though this is kDexoptNeeded, we use + // kVerboseLogging instead of the usual kReasonLogging since it is + // the common case on first boot and very spammy. + if (kVerboseLogging) { LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << oat_filename << "' for file location '" << filename << "': " << error_msg; } diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index a348432340..f503b354f7 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -34,6 +34,7 @@ #include "gc/heap.h" #include "gc/space/dlmalloc_space.h" #include "gc/space/image_space.h" +#include "gc/task_processor.h" #include "intern_table.h" #include "jni_internal.h" #include "mirror/art_method-inl.h" @@ -213,19 +214,32 @@ static void VMRuntime_updateProcessState(JNIEnv*, jobject, jint process_state) { runtime->UpdateProfilerState(process_state); } -static void VMRuntime_trimHeap(JNIEnv*, jobject) { - Runtime::Current()->GetHeap()->DoPendingTransitionOrTrim(); +static void VMRuntime_trimHeap(JNIEnv* env, jobject) { + Runtime::Current()->GetHeap()->Trim(ThreadForEnv(env)); } static void VMRuntime_concurrentGC(JNIEnv* env, jobject) { Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env)); } +static void VMRuntime_requestHeapTrim(JNIEnv* env, jobject) { + Runtime::Current()->GetHeap()->RequestTrim(ThreadForEnv(env)); +} + static void VMRuntime_requestConcurrentGC(JNIEnv* env, jobject) { - Runtime::Current()->GetHeap()->NotifyConcurrentGCRequest(ThreadForEnv(env)); + Runtime::Current()->GetHeap()->RequestConcurrentGC(ThreadForEnv(env)); } -static void VMRuntime_waitForConcurrentGCRequest(JNIEnv* env, jobject) { - Runtime::Current()->GetHeap()->WaitForConcurrentGCRequest(ThreadForEnv(env)); + +static void VMRuntime_startHeapTaskProcessor(JNIEnv* env, jobject) { + Runtime::Current()->GetHeap()->GetTaskProcessor()->Start(ThreadForEnv(env)); +} + +static void VMRuntime_stopHeapTaskProcessor(JNIEnv* env, jobject) { + Runtime::Current()->GetHeap()->GetTaskProcessor()->Stop(ThreadForEnv(env)); +} + +static void VMRuntime_runHeapTasks(JNIEnv* env, jobject) { + Runtime::Current()->GetHeap()->GetTaskProcessor()->RunAllTasks(ThreadForEnv(env)); } typedef std::map<std::string, mirror::String*> StringTable; @@ -566,8 +580,6 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(VMRuntime, classPath, "()Ljava/lang/String;"), NATIVE_METHOD(VMRuntime, clearGrowthLimit, "()V"), NATIVE_METHOD(VMRuntime, concurrentGC, "()V"), - NATIVE_METHOD(VMRuntime, requestConcurrentGC, "()V"), - NATIVE_METHOD(VMRuntime, waitForConcurrentGCRequest, "()V"), NATIVE_METHOD(VMRuntime, disableJitCompilation, "()V"), NATIVE_METHOD(VMRuntime, getTargetHeapUtilization, "()F"), NATIVE_METHOD(VMRuntime, isDebuggerActive, "!()Z"), @@ -578,8 +590,13 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(VMRuntime, setTargetSdkVersionNative, "(I)V"), NATIVE_METHOD(VMRuntime, registerNativeAllocation, "(I)V"), NATIVE_METHOD(VMRuntime, registerNativeFree, "(I)V"), + NATIVE_METHOD(VMRuntime, requestConcurrentGC, "()V"), + NATIVE_METHOD(VMRuntime, requestHeapTrim, "()V"), + NATIVE_METHOD(VMRuntime, runHeapTasks, "()V"), NATIVE_METHOD(VMRuntime, updateProcessState, "(I)V"), + NATIVE_METHOD(VMRuntime, startHeapTaskProcessor, "()V"), NATIVE_METHOD(VMRuntime, startJitCompilation, "()V"), + NATIVE_METHOD(VMRuntime, stopHeapTaskProcessor, "()V"), NATIVE_METHOD(VMRuntime, trimHeap, "()V"), NATIVE_METHOD(VMRuntime, vmVersion, "()Ljava/lang/String;"), NATIVE_METHOD(VMRuntime, vmLibrary, "()Ljava/lang/String;"), diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc index f1a04cb35d..5f68d6000a 100644 --- a/runtime/native/dalvik_system_ZygoteHooks.cc +++ b/runtime/native/dalvik_system_ZygoteHooks.cc @@ -86,9 +86,15 @@ static void EnableDebugFeatures(uint32_t debug_flags) { } debug_flags &= ~DEBUG_ENABLE_DEBUGGER; - // These two are for backwards compatibility with Dalvik. + if ((debug_flags & DEBUG_ENABLE_SAFEMODE) != 0) { + // Ensure that any (secondary) oat files will be interpreted. + Runtime* runtime = Runtime::Current(); + runtime->AddCompilerOption("--compiler-filter=interpret-only"); + debug_flags &= ~DEBUG_ENABLE_SAFEMODE; + } + + // This is for backwards compatibility with Dalvik. debug_flags &= ~DEBUG_ENABLE_ASSERT; - debug_flags &= ~DEBUG_ENABLE_SAFEMODE; if (debug_flags != 0) { LOG(ERROR) << StringPrintf("Unknown bits set in debug_flags: %#x", debug_flags); diff --git a/runtime/oat.h b/runtime/oat.h index f218482050..8e63d3ae8d 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '4', '5', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '5', '3', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 90c9fe7c32..3517848927 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -214,7 +214,7 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { Handle<mirror::ArtMethod> h_method(hs.NewHandle(m)); verifier::MethodVerifier verifier(self_, h_dex_cache->GetDexFile(), h_dex_cache, h_class_loader, &m->GetClassDef(), code_item, m->GetDexMethodIndex(), - h_method, m->GetAccessFlags(), false, true, true); + h_method, m->GetAccessFlags(), false, true, true, true); verifier.Verify(); const std::vector<int32_t> kinds(verifier.DescribeVRegs(dex_pc)); for (uint16_t reg = 0; reg < num_regs; ++reg) { diff --git a/runtime/runtime.cc b/runtime/runtime.cc index e91f7c0c90..a2c9f502b9 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -858,6 +858,7 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U); class_linker_ = new ClassLinker(intern_table_); + bool options_class_path_used = false; if (GetHeap()->HasImageSpace()) { class_linker_->InitFromImage(); if (kIsDebugBuild) { @@ -881,7 +882,16 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) CHECK(options->boot_class_path_ != nullptr); CHECK_NE(options->boot_class_path_->size(), 0U); class_linker_->InitWithoutImage(*options->boot_class_path_); + options_class_path_used = true; } + + if (!options_class_path_used) { + // If the class linker does not take ownership of the boot class path, wipe it to prevent leaks. + auto boot_class_path_vector_ptr = + const_cast<std::vector<const DexFile*>*>(options->boot_class_path_); + STLDeleteElements(boot_class_path_vector_ptr); + } + CHECK(class_linker_ != nullptr); // Initialize the special sentinel_ value early. diff --git a/runtime/runtime.h b/runtime/runtime.h index 39fd910893..e31996338d 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -133,6 +133,10 @@ class Runtime { return compiler_options_; } + void AddCompilerOption(std::string option) { + compiler_options_.push_back(option); + } + const std::vector<std::string>& GetImageCompilerOptions() const { return image_compiler_options_; } diff --git a/runtime/thread.cc b/runtime/thread.cc index 4a7103b141..d2d5be7c1e 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -515,12 +515,6 @@ bool Thread::InitStackHwm() { size_t read_guard_size; GetThreadStack(tlsPtr_.pthread_self, &read_stack_base, &read_stack_size, &read_guard_size); - // This is included in the SIGQUIT output, but it's useful here for thread debugging. - VLOG(threads) << StringPrintf("Native stack is at %p (%s with %s guard)", - read_stack_base, - PrettySize(read_stack_size).c_str(), - PrettySize(read_guard_size).c_str()); - tlsPtr_.stack_begin = reinterpret_cast<uint8_t*>(read_stack_base); tlsPtr_.stack_size = read_stack_size; @@ -537,6 +531,12 @@ bool Thread::InitStackHwm() { return false; } + // This is included in the SIGQUIT output, but it's useful here for thread debugging. + VLOG(threads) << StringPrintf("Native stack is at %p (%s with %s guard)", + read_stack_base, + PrettySize(read_stack_size).c_str(), + PrettySize(read_guard_size).c_str()); + // Set stack_end_ to the bottom of the stack saving space of stack overflows Runtime* runtime = Runtime::Current(); @@ -932,7 +932,10 @@ struct StackDumpVisitor : public StackVisitor { os << StringPrintf("<@addr=0x%" PRIxPTR "> (a %s)", reinterpret_cast<intptr_t>(o), PrettyTypeOf(o).c_str()); } else { - os << StringPrintf("<0x%08x> (a %s)", o->IdentityHashCode(), PrettyTypeOf(o).c_str()); + // IdentityHashCode can cause thread suspension, which would invalidate o if it moved. So + // we get the pretty type beofre we call IdentityHashCode. + const std::string pretty_type(PrettyTypeOf(o)); + os << StringPrintf("<0x%08x> (a %s)", o->IdentityHashCode(), pretty_type.c_str()); } } os << "\n"; @@ -1339,7 +1342,6 @@ void Thread::HandleScopeVisitRoots(RootCallback* visitor, void* arg, uint32_t th } mirror::Object* Thread::DecodeJObject(jobject obj) const { - Locks::mutator_lock_->AssertSharedHeld(this); if (obj == nullptr) { return nullptr; } diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 968e89d1da..9707c7ba7a 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -101,9 +101,12 @@ void ThreadList::DumpNativeStacks(std::ostream& os) { void ThreadList::DumpForSigQuit(std::ostream& os) { { ScopedObjectAccess soa(Thread::Current()); - Histogram<uint64_t>::CumulativeData data; - suspend_all_historam_.CreateHistogram(&data); - suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data); // Dump time to suspend. + // Only print if we have samples. + if (suspend_all_historam_.SampleSize() > 0) { + Histogram<uint64_t>::CumulativeData data; + suspend_all_historam_.CreateHistogram(&data); + suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data); // Dump time to suspend. + } } Dump(os); DumpUnattachedThreads(os); diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h index 8c080673f9..79b57afedd 100644 --- a/runtime/thread_pool.h +++ b/runtime/thread_pool.h @@ -36,10 +36,18 @@ class Closure { class Task : public Closure { public: - // Called when references reaches 0. + // Called after Closure::Run has been called. virtual void Finalize() { } }; +class SelfDeletingTask : public Task { + public: + virtual ~SelfDeletingTask() { } + virtual void Finalize() { + delete this; + } +}; + class ThreadPoolWorker { public: static const size_t kDefaultStackSize = 1 * MB; diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index 81172cb83a..88944d79bc 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -103,6 +103,14 @@ ALWAYS_INLINE static inline bool FailOrAbort(MethodVerifier* verifier, bool cond return false; } +static void SafelyMarkAllRegistersAsConflicts(MethodVerifier* verifier, RegisterLine* reg_line) { + if (verifier->IsConstructor()) { + // Before we mark all regs as conflicts, check that we don't have an uninitialized this. + reg_line->CheckConstructorReturn(verifier); + } + reg_line->MarkAllRegistersAsConflicts(verifier); +} + MethodVerifier::FailureKind MethodVerifier::VerifyClass(Thread* self, mirror::Class* klass, bool allow_soft_failures, @@ -278,7 +286,7 @@ MethodVerifier::FailureKind MethodVerifier::VerifyMethod(Thread* self, uint32_t MethodVerifier verifier(self, dex_file, dex_cache, class_loader, class_def, code_item, method_idx, method, method_access_flags, true, allow_soft_failures, - need_precise_constants); + need_precise_constants, true); if (verifier.Verify()) { // Verification completed, however failures may be pending that didn't cause the verification // to hard fail. @@ -344,7 +352,8 @@ MethodVerifier::MethodVerifier(Thread* self, const DexFile::CodeItem* code_item, uint32_t dex_method_idx, Handle<mirror::ArtMethod> method, uint32_t method_access_flags, bool can_load_classes, bool allow_soft_failures, - bool need_precise_constants, bool verify_to_dump) + bool need_precise_constants, bool verify_to_dump, + bool allow_thread_suspension) : self_(self), reg_types_(can_load_classes), work_insn_idx_(-1), @@ -369,7 +378,8 @@ MethodVerifier::MethodVerifier(Thread* self, need_precise_constants_(need_precise_constants), has_check_casts_(false), has_virtual_or_interface_invokes_(false), - verify_to_dump_(verify_to_dump) { + verify_to_dump_(verify_to_dump), + allow_thread_suspension_(allow_thread_suspension) { Runtime::Current()->AddMethodVerifier(this); DCHECK(class_def != nullptr); } @@ -388,7 +398,7 @@ void MethodVerifier::FindLocksAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc, Handle<mirror::ArtMethod> method(hs.NewHandle(m)); MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(), m->GetCodeItem(), m->GetDexMethodIndex(), method, m->GetAccessFlags(), - false, true, false); + false, true, false, false); verifier.interesting_dex_pc_ = dex_pc; verifier.monitor_enter_dex_pcs_ = monitor_enter_dex_pcs; verifier.FindLocksAtDexPc(); @@ -435,7 +445,7 @@ mirror::ArtField* MethodVerifier::FindAccessedFieldAtDexPc(mirror::ArtMethod* m, Handle<mirror::ArtMethod> method(hs.NewHandle(m)); MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(), m->GetCodeItem(), m->GetDexMethodIndex(), method, m->GetAccessFlags(), - true, true, false); + true, true, false, true); return verifier.FindAccessedFieldAtDexPc(dex_pc); } @@ -467,7 +477,7 @@ mirror::ArtMethod* MethodVerifier::FindInvokedMethodAtDexPc(mirror::ArtMethod* m Handle<mirror::ArtMethod> method(hs.NewHandle(m)); MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(), m->GetCodeItem(), m->GetDexMethodIndex(), method, m->GetAccessFlags(), - true, true, false); + true, true, false, true); return verifier.FindInvokedMethodAtDexPc(dex_pc); } @@ -1394,7 +1404,9 @@ bool MethodVerifier::CodeFlowVerifyMethod() { /* Continue until no instructions are marked "changed". */ while (true) { - self_->AllowThreadSuspension(); + if (allow_thread_suspension_) { + self_->AllowThreadSuspension(); + } // Find the first marked one. Use "start_guess" as a way to find one quickly. uint32_t insn_idx = start_guess; for (; insn_idx < insns_size; insn_idx++) { @@ -1559,6 +1571,16 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) { std::unique_ptr<RegisterLine> branch_line; std::unique_ptr<RegisterLine> fallthrough_line; + /* + * If we are in a constructor, and we currently have an UninitializedThis type + * in a register somewhere, we need to make sure it isn't overwritten. + */ + bool track_uninitialized_this = false; + size_t uninitialized_this_loc = 0; + if (IsConstructor()) { + track_uninitialized_this = work_line_->GetUninitializedThisLoc(this, &uninitialized_this_loc); + } + switch (inst->Opcode()) { case Instruction::NOP: /* @@ -2769,6 +2791,20 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) { */ } // end - switch (dec_insn.opcode) + /* + * If we are in a constructor, and we had an UninitializedThis type + * in a register somewhere, we need to make sure it wasn't overwritten. + */ + if (track_uninitialized_this) { + bool was_invoke_direct = (inst->Opcode() == Instruction::INVOKE_DIRECT || + inst->Opcode() == Instruction::INVOKE_DIRECT_RANGE); + if (work_line_->WasUninitializedThisOverwritten(this, uninitialized_this_loc, + was_invoke_direct)) { + Fail(VERIFY_ERROR_BAD_CLASS_HARD) + << "Constructor failed to initialize this object"; + } + } + if (have_pending_hard_failure_) { if (Runtime::Current()->IsCompiler()) { /* When compiling, check that the last failure is a hard failure */ @@ -2950,7 +2986,7 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) { const Instruction* ret_inst = Instruction::At(code_item_->insns_ + next_insn_idx); Instruction::Code opcode = ret_inst->Opcode(); if ((opcode == Instruction::RETURN_VOID) || (opcode == Instruction::RETURN_VOID_BARRIER)) { - work_line_->MarkAllRegistersAsConflicts(this); + SafelyMarkAllRegistersAsConflicts(this, work_line_.get()); } else { if (opcode == Instruction::RETURN_WIDE) { work_line_->MarkAllRegistersAsConflictsExceptWide(this, ret_inst->VRegA_11x()); @@ -4105,7 +4141,7 @@ bool MethodVerifier::UpdateRegisters(uint32_t next_insn, RegisterLine* merge_lin const Instruction* ret_inst = Instruction::At(code_item_->insns_ + next_insn); Instruction::Code opcode = ret_inst->Opcode(); if ((opcode == Instruction::RETURN_VOID) || (opcode == Instruction::RETURN_VOID_BARRIER)) { - target_line->MarkAllRegistersAsConflicts(this); + SafelyMarkAllRegistersAsConflicts(this, target_line); } else { target_line->CopyFromLine(merge_line); if (opcode == Instruction::RETURN_WIDE) { diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h index c3bd4af21a..b83e647ada 100644 --- a/runtime/verifier/method_verifier.h +++ b/runtime/verifier/method_verifier.h @@ -207,10 +207,11 @@ class MethodVerifier { const DexFile::CodeItem* code_item, uint32_t method_idx, Handle<mirror::ArtMethod> method, uint32_t access_flags, bool can_load_classes, bool allow_soft_failures, - bool need_precise_constants) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + bool need_precise_constants, bool allow_thread_suspension) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) : MethodVerifier(self, dex_file, dex_cache, class_loader, class_def, code_item, method_idx, method, access_flags, can_load_classes, allow_soft_failures, - need_precise_constants, false) {} + need_precise_constants, false, allow_thread_suspension) {} ~MethodVerifier(); @@ -243,6 +244,16 @@ class MethodVerifier { bool is_range) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // Is the method being verified a constructor? + bool IsConstructor() const { + return (method_access_flags_ & kAccConstructor) != 0; + } + + // Is the method verified static? + bool IsStatic() const { + return (method_access_flags_ & kAccStatic) != 0; + } + private: // Private constructor for dumping. MethodVerifier(Thread* self, const DexFile* dex_file, Handle<mirror::DexCache> dex_cache, @@ -250,7 +261,7 @@ class MethodVerifier { const DexFile::CodeItem* code_item, uint32_t method_idx, Handle<mirror::ArtMethod> method, uint32_t access_flags, bool can_load_classes, bool allow_soft_failures, bool need_precise_constants, - bool verify_to_dump) + bool verify_to_dump, bool allow_thread_suspension) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Adds the given string to the beginning of the last failure message. @@ -625,16 +636,6 @@ class MethodVerifier { bool UpdateRegisters(uint32_t next_insn, RegisterLine* merge_line, bool update_merge_line) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - // Is the method being verified a constructor? - bool IsConstructor() const { - return (method_access_flags_ & kAccConstructor) != 0; - } - - // Is the method verified static? - bool IsStatic() const { - return (method_access_flags_ & kAccStatic) != 0; - } - // Return the register type for the method. const RegType& GetMethodReturnType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -729,6 +730,11 @@ class MethodVerifier { // VerifyMethodAndDump. const bool verify_to_dump_; + // Whether or not we call AllowThreadSuspension periodically, we want a way to disable this for + // thread dumping checkpoints since we may get thread suspension at an inopportune time due to + // FindLocksAtDexPC, resulting in deadlocks. + const bool allow_thread_suspension_; + DISALLOW_COPY_AND_ASSIGN(MethodVerifier); }; std::ostream& operator<<(std::ostream& os, const MethodVerifier::FailureKind& rhs); diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc index 72d793897c..3b098718db 100644 --- a/runtime/verifier/register_line.cc +++ b/runtime/verifier/register_line.cc @@ -25,6 +25,49 @@ namespace art { namespace verifier { +bool RegisterLine::WasUninitializedThisOverwritten(MethodVerifier* verifier, + size_t this_loc, + bool was_invoke_direct) const { + DCHECK(verifier->IsConstructor()); + + // Is the UnintializedThis type still there? + if (GetRegisterType(verifier, this_loc).IsUninitializedThisReference() || + GetRegisterType(verifier, this_loc).IsUnresolvedAndUninitializedThisReference()) { + return false; + } + + // If there is an initialized reference here now, did we just perform an invoke-direct? Note that + // this is the correct approach for dex bytecode: results of invoke-direct are stored in the + // result register. Overwriting "this_loc" can only be done by a constructor call. + if (GetRegisterType(verifier, this_loc).IsReferenceTypes() && was_invoke_direct) { + return false; + // Otherwise we could have just copied a different initialized reference to this location. + } + + // The UnintializedThis in the register is gone, so check to see if it's somewhere else now. + for (size_t i = 0; i < num_regs_; i++) { + if (GetRegisterType(verifier, i).IsUninitializedThisReference() || + GetRegisterType(verifier, i).IsUnresolvedAndUninitializedThisReference()) { + // We found it somewhere else... + return false; + } + } + + // The UninitializedThis is gone from the original register, and now we can't find it. + return true; +} + +bool RegisterLine::GetUninitializedThisLoc(MethodVerifier* verifier, size_t* vreg) const { + for (size_t i = 0; i < num_regs_; i++) { + if (GetRegisterType(verifier, i).IsUninitializedThisReference() || + GetRegisterType(verifier, i).IsUnresolvedAndUninitializedThisReference()) { + *vreg = i; + return true; + } + } + return false; +} + bool RegisterLine::CheckConstructorReturn(MethodVerifier* verifier) const { for (size_t i = 0; i < num_regs_; i++) { if (GetRegisterType(verifier, i).IsUninitializedThisReference() || diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h index 52b5c13888..ca61a0b8f0 100644 --- a/runtime/verifier/register_line.h +++ b/runtime/verifier/register_line.h @@ -157,6 +157,18 @@ class RegisterLine { */ bool CheckConstructorReturn(MethodVerifier* verifier) const; + /* + * Check if an UninitializedThis at the specified location has been overwritten before + * being correctly initialized. + */ + bool WasUninitializedThisOverwritten(MethodVerifier* verifier, size_t this_loc, + bool was_invoke_direct) const; + + /* + * Get the first location of an UninitializedThis type, or return kInvalidVreg if there are none. + */ + bool GetUninitializedThisLoc(MethodVerifier* verifier, size_t* vreg) const; + // Compare two register lines. Returns 0 if they match. // Using this for a sort is unwise, since the value can change based on machine endianness. int CompareLine(const RegisterLine* line2) const { diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc index 601e3210a9..2eb518c531 100644 --- a/sigchainlib/sigchain.cc +++ b/sigchainlib/sigchain.cc @@ -170,12 +170,13 @@ extern "C" int sigaction(int signal, const struct sigaction* new_action, struct // Note that we check that the signal number is in range here. An out of range signal // number should behave exactly as the libc sigaction. if (signal > 0 && signal < _NSIG && user_sigactions[signal].IsClaimed()) { - if (old_action != NULL) { - *old_action = user_sigactions[signal].GetAction(); - } + struct sigaction saved_action = user_sigactions[signal].GetAction(); if (new_action != NULL) { user_sigactions[signal].SetAction(*new_action, false); } + if (old_action != NULL) { + *old_action = saved_action; + } return 0; } diff --git a/test/004-SignalTest/src/Main.java b/test/004-SignalTest/src/Main.java index 0391592543..8b1f49bacb 100644 --- a/test/004-SignalTest/src/Main.java +++ b/test/004-SignalTest/src/Main.java @@ -20,7 +20,7 @@ public class Main { private static native int testSignal(); private static void stackOverflow() { - stackOverflow(); + stackOverflow(); } public static void main(String[] args) { @@ -40,7 +40,6 @@ public class Main { } try { stackOverflow(); - // Should never get here. throw new AssertionError(); } catch (StackOverflowError e) { diff --git a/test/436-rem-float/src/Main.java b/test/436-rem-float/src/Main.java index e20c21f9cc..cc6341a527 100644 --- a/test/436-rem-float/src/Main.java +++ b/test/436-rem-float/src/Main.java @@ -22,13 +22,44 @@ public class Main { } private static void remFloat() { - expectApproxEquals(2F, $opt$RemConst(6F)); + expectApproxEquals(1.98F, $opt$Rem(1.98F, 2F)); + expectApproxEquals(0F, $opt$Rem(2F, 0.5F)); + expectApproxEquals(0.09999F, $opt$Rem(1.0F, 0.1F)); + expectApproxEquals(1.9F, $opt$Rem(6.5F, 2.3F)); + expectApproxEquals(0.48F, $opt$Rem(1.98F, 1.5F)); + expectApproxEquals(0.9999F, $opt$Rem(0.9999F, 1.222F)); + expectApproxEquals(0.9999F, $opt$Rem(0.9999F, 1.0001F)); + expectApproxEquals(-1.98F, $opt$Rem(-1.98F, 2F)); + expectApproxEquals(-0F, $opt$Rem(-2F, 0.5F)); + expectApproxEquals(-0.09999F, $opt$Rem(-1.0F, 0.1F)); + expectApproxEquals(-1.9F, $opt$Rem(-6.5F, 2.3F)); + expectApproxEquals(-0.48F, $opt$Rem(-1.98F, 1.5F)); + expectApproxEquals(-0.9999F, $opt$Rem(-0.9999F, 1.222F)); + expectApproxEquals(-0.9999F, $opt$Rem(-0.9999F, 1.0001F)); + expectApproxEquals(1.98F, $opt$Rem(1.98F, -2F)); + expectApproxEquals(0F, $opt$Rem(2F, -0.5F)); + expectApproxEquals(0.09999F, $opt$Rem(1.0F, -0.1F)); + expectApproxEquals(1.9F, $opt$Rem(6.5F, -2.3F)); + expectApproxEquals(0.48F, $opt$Rem(1.98F, -1.5F)); + expectApproxEquals(0.9999F, $opt$Rem(0.9999F, -1.222F)); + expectApproxEquals(0.9999F, $opt$Rem(0.9999F, -1.0001F)); + expectApproxEquals(-1.98F, $opt$Rem(-1.98F, -2F)); + expectApproxEquals(-0F, $opt$Rem(-2F, -0.5F)); + expectApproxEquals(-0.09999F, $opt$Rem(-1.0F, -0.1F)); + expectApproxEquals(-1.9F, $opt$Rem(-6.5F, -2.3F)); + expectApproxEquals(-0.48F, $opt$Rem(-1.98F, -1.5F)); + expectApproxEquals(-0.9999F, $opt$Rem(-0.9999F, -1.222F)); + expectApproxEquals(-0.9999F, $opt$Rem(-0.9999F, -1.0001F)); + + expectApproxEquals(1.68267e-18F, $opt$Rem(61615.2F, -2.48699e-17F)); + expectApproxEquals(-8.63819e-09F, $opt$Rem(-1.73479e+14F, 3.11154e-08F)); + expectApproxEquals(1.10911e-12F, $opt$Rem(338122F, 4.57572e-12F)); + expectApproxEquals(2F, $opt$RemConst(6F)); expectApproxEquals(2F, $opt$Rem(5.1F, 3.1F)); expectApproxEquals(2.1F, $opt$Rem(5.1F, 3F)); expectApproxEquals(-2F, $opt$Rem(-5.1F, 3.1F)); expectApproxEquals(-2.1F, $opt$Rem(-5.1F, -3F)); - expectApproxEquals(2F, $opt$Rem(6F, 4F)); expectApproxEquals(2F, $opt$Rem(6F, -4F)); expectApproxEquals(0F, $opt$Rem(6F, 3F)); @@ -43,34 +74,87 @@ public class Main { expectApproxEquals(7F, $opt$Rem(7F, -9F)); expectApproxEquals(-7F, $opt$Rem(-7F, 9F)); expectApproxEquals(-7F, $opt$Rem(-7F, -9F)); - expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, 1F)); expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, -1F)); expectApproxEquals(0F, $opt$Rem(Float.MIN_VALUE, 1F)); expectApproxEquals(0F, $opt$Rem(Float.MIN_VALUE, -1F)); - expectApproxEquals(0F, $opt$Rem(0F, 7F)); expectApproxEquals(0F, $opt$Rem(0F, Float.MAX_VALUE)); expectApproxEquals(0F, $opt$Rem(0F, Float.MIN_VALUE)); + expectApproxEquals(0F, $opt$Rem(0F, Float.POSITIVE_INFINITY)); + expectApproxEquals(0F, $opt$Rem(0F, Float.NEGATIVE_INFINITY)); + expectApproxEquals(4F, $opt$Rem(4F, Float.POSITIVE_INFINITY)); + expectApproxEquals(4F, $opt$Rem(4F, Float.NEGATIVE_INFINITY)); + expectApproxEquals(-4F, $opt$Rem(-4F, Float.POSITIVE_INFINITY)); + expectApproxEquals(-4F, $opt$Rem(-4F, Float.NEGATIVE_INFINITY)); + expectApproxEquals(0F, $opt$Rem(Float.MIN_NORMAL, Float.MIN_VALUE)); + expectApproxEquals(0F, $opt$Rem(Float.MIN_NORMAL, Float.MIN_NORMAL)); + expectApproxEquals(0F, $opt$Rem(Float.MIN_VALUE, Float.MIN_VALUE)); + expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, Float.MIN_VALUE)); + expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, Float.MAX_VALUE)); + expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, Float.MIN_NORMAL)); + expectApproxEquals(Float.MIN_NORMAL, $opt$Rem(Float.MIN_NORMAL, Float.MAX_VALUE)); + expectApproxEquals(Float.MIN_NORMAL, $opt$Rem(Float.MIN_NORMAL, Float.NEGATIVE_INFINITY)); + expectApproxEquals(Float.MIN_NORMAL, $opt$Rem(Float.MIN_NORMAL, Float.POSITIVE_INFINITY)); + expectApproxEquals(Float.MIN_VALUE, $opt$Rem(Float.MIN_VALUE, Float.MAX_VALUE)); + expectApproxEquals(Float.MIN_VALUE, $opt$Rem(Float.MIN_VALUE, Float.MIN_NORMAL)); + expectApproxEquals(Float.MIN_VALUE, $opt$Rem(Float.MIN_VALUE, Float.NEGATIVE_INFINITY)); + expectApproxEquals(Float.MIN_VALUE, $opt$Rem(Float.MIN_VALUE, Float.POSITIVE_INFINITY)); + expectApproxEquals(Float.MAX_VALUE, $opt$Rem(Float.MAX_VALUE, Float.NEGATIVE_INFINITY)); + expectApproxEquals(Float.MAX_VALUE, $opt$Rem(Float.MAX_VALUE, Float.POSITIVE_INFINITY)); expectNaN($opt$Rem(Float.NaN, 3F)); expectNaN($opt$Rem(3F, Float.NaN)); - expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY)); - expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY)); expectNaN($opt$Rem(3F, 0F)); - - expectApproxEquals(4F, $opt$Rem(4F, Float.POSITIVE_INFINITY)); - expectApproxEquals(4F, $opt$Rem(4F, Float.NEGATIVE_INFINITY)); + expectNaN($opt$Rem(1F, 0F)); + expectNaN($opt$Rem(-1F, 0F)); + expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.MIN_VALUE)); + expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.MAX_VALUE)); + expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.MIN_NORMAL)); + expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY)); + expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY)); + expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.MIN_VALUE)); + expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.MAX_VALUE)); + expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.MIN_NORMAL)); + expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY)); + expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.POSITIVE_INFINITY)); } private static void remDouble() { - expectApproxEquals(2D, $opt$RemConst(6D)); + expectApproxEquals(1.98D, $opt$Rem(1.98D, 2D)); + expectApproxEquals(0D, $opt$Rem(2D, 0.5D)); + expectApproxEquals(0.09999D, $opt$Rem(1.0D, 0.1D)); + expectApproxEquals(1.9D, $opt$Rem(6.5D, 2.3D)); + expectApproxEquals(0.48D, $opt$Rem(1.98D, 1.5D)); + expectApproxEquals(0.9999D, $opt$Rem(0.9999D, 1.222D)); + expectApproxEquals(0.9999D, $opt$Rem(0.9999D, 1.0001D)); + expectApproxEquals(-1.98D, $opt$Rem(-1.98D, 2D)); + expectApproxEquals(-0D, $opt$Rem(-2D, 0.5D)); + expectApproxEquals(-0.09999D, $opt$Rem(-1.0D, 0.1D)); + expectApproxEquals(-1.9D, $opt$Rem(-6.5D, 2.3D)); + expectApproxEquals(-0.48D, $opt$Rem(-1.98D, 1.5D)); + expectApproxEquals(-0.9999D, $opt$Rem(-0.9999D, 1.222D)); + expectApproxEquals(-0.9999D, $opt$Rem(-0.9999D, 1.0001D)); + expectApproxEquals(1.98D, $opt$Rem(1.98D, -2D)); + expectApproxEquals(0D, $opt$Rem(2D, -0.5D)); + expectApproxEquals(0.09999D, $opt$Rem(1.0D, -0.1D)); + expectApproxEquals(1.9D, $opt$Rem(6.5D, -2.3D)); + expectApproxEquals(0.48D, $opt$Rem(1.98D, -1.5D)); + expectApproxEquals(0.9999D, $opt$Rem(0.9999D, -1.222D)); + expectApproxEquals(0.9999D, $opt$Rem(0.9999D, -1.0001D)); + expectApproxEquals(-1.98D, $opt$Rem(-1.98D, -2D)); + expectApproxEquals(-0D, $opt$Rem(-2D, -0.5D)); + expectApproxEquals(-0.09999D, $opt$Rem(-1.0D, -0.1D)); + expectApproxEquals(-1.9D, $opt$Rem(-6.5D, -2.3D)); + expectApproxEquals(-0.48D, $opt$Rem(-1.98D, -1.5D)); + expectApproxEquals(-0.9999D, $opt$Rem(-0.9999D, -1.222D)); + expectApproxEquals(-0.9999D, $opt$Rem(-0.9999D, -1.0001D)); + expectApproxEquals(2D, $opt$RemConst(6D)); expectApproxEquals(2D, $opt$Rem(5.1D, 3.1D)); expectApproxEquals(2.1D, $opt$Rem(5.1D, 3D)); expectApproxEquals(-2D, $opt$Rem(-5.1D, 3.1D)); expectApproxEquals(-2.1D, $opt$Rem(-5.1D, -3D)); - expectApproxEquals(2D, $opt$Rem(6D, 4D)); expectApproxEquals(2D, $opt$Rem(6D, -4D)); expectApproxEquals(0D, $opt$Rem(6D, 3D)); @@ -85,24 +169,50 @@ public class Main { expectApproxEquals(7D, $opt$Rem(7D, -9D)); expectApproxEquals(-7D, $opt$Rem(-7D, 9D)); expectApproxEquals(-7D, $opt$Rem(-7D, -9D)); - expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, 1D)); expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, -1D)); expectApproxEquals(0D, $opt$Rem(Double.MIN_VALUE, 1D)); expectApproxEquals(0D, $opt$Rem(Double.MIN_VALUE, -1D)); - expectApproxEquals(0D, $opt$Rem(0D, 7D)); expectApproxEquals(0D, $opt$Rem(0D, Double.MAX_VALUE)); expectApproxEquals(0D, $opt$Rem(0D, Double.MIN_VALUE)); + expectApproxEquals(0D, $opt$Rem(0D, Double.POSITIVE_INFINITY)); + expectApproxEquals(0D, $opt$Rem(0D, Double.NEGATIVE_INFINITY)); + expectApproxEquals(4D, $opt$Rem(4D, Double.POSITIVE_INFINITY)); + expectApproxEquals(4D, $opt$Rem(4D, Double.NEGATIVE_INFINITY)); + expectApproxEquals(-4D, $opt$Rem(-4D, Double.POSITIVE_INFINITY)); + expectApproxEquals(-4D, $opt$Rem(-4D, Double.NEGATIVE_INFINITY)); + expectApproxEquals(0D, $opt$Rem(Double.MIN_NORMAL, Double.MIN_VALUE)); + expectApproxEquals(0D, $opt$Rem(Double.MIN_NORMAL, Double.MIN_NORMAL)); + expectApproxEquals(0D, $opt$Rem(Double.MIN_VALUE, Double.MIN_VALUE)); + expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, Double.MIN_VALUE)); + expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, Double.MAX_VALUE)); + expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, Double.MIN_NORMAL)); + expectApproxEquals(Double.MIN_NORMAL, $opt$Rem(Double.MIN_NORMAL, Double.MAX_VALUE)); + expectApproxEquals(Double.MIN_NORMAL, $opt$Rem(Double.MIN_NORMAL, Double.NEGATIVE_INFINITY)); + expectApproxEquals(Double.MIN_NORMAL, $opt$Rem(Double.MIN_NORMAL, Double.POSITIVE_INFINITY)); + expectApproxEquals(Double.MIN_VALUE, $opt$Rem(Double.MIN_VALUE, Double.MAX_VALUE)); + expectApproxEquals(Double.MIN_VALUE, $opt$Rem(Double.MIN_VALUE, Double.MIN_NORMAL)); + expectApproxEquals(Double.MIN_VALUE, $opt$Rem(Double.MIN_VALUE, Double.NEGATIVE_INFINITY)); + expectApproxEquals(Double.MIN_VALUE, $opt$Rem(Double.MIN_VALUE, Double.POSITIVE_INFINITY)); + expectApproxEquals(Double.MAX_VALUE, $opt$Rem(Double.MAX_VALUE, Double.NEGATIVE_INFINITY)); + expectApproxEquals(Double.MAX_VALUE, $opt$Rem(Double.MAX_VALUE, Double.POSITIVE_INFINITY)); expectNaN($opt$Rem(Double.NaN, 3D)); expectNaN($opt$Rem(3D, Double.NaN)); - expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY)); - expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY)); expectNaN($opt$Rem(3D, 0D)); - - expectApproxEquals(4D, $opt$Rem(4D, Double.POSITIVE_INFINITY)); - expectApproxEquals(4D, $opt$Rem(4D, Double.NEGATIVE_INFINITY)); + expectNaN($opt$Rem(1D, 0D)); + expectNaN($opt$Rem(-1D, 0D)); + expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.MIN_VALUE)); + expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.MAX_VALUE)); + expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.MIN_NORMAL)); + expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY)); + expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY)); + expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.MIN_VALUE)); + expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.MAX_VALUE)); + expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.MIN_NORMAL)); + expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY)); + expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY)); } static float $opt$Rem(float a, float b) { diff --git a/test/437-inline/src/Main.java b/test/437-inline/src/Main.java index ccddab757e..daabe4e4f8 100644 --- a/test/437-inline/src/Main.java +++ b/test/437-inline/src/Main.java @@ -46,6 +46,23 @@ public class Main { if ($opt$inline$returnSub(42, 1) != 41) { throw new Error(); } + + // Some architectures used to not be able to allocate registers with + // floating point operations. This call is a regression test that we don't + // try inlining methods with floats in it on such architectures. The + // compiler used to crash after inlining a method it cannot allocate + // registers for. + tryInlineFloat(); + } + + public static int tryInlineFloat() { + return useFloatMethod(); + } + + public static float staticFloat = 42.0f; + + public static int useFloatMethod() { + return (int)staticFloat; } public static int $opt$inline$returnParameter(int a) { diff --git a/test/438-volatile/expected.txt b/test/438-volatile/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/438-volatile/expected.txt diff --git a/test/438-volatile/info.txt b/test/438-volatile/info.txt new file mode 100644 index 0000000000..7a4c81aa75 --- /dev/null +++ b/test/438-volatile/info.txt @@ -0,0 +1 @@ +Tests basic operations (set/get) on volatiles. diff --git a/test/438-volatile/src/Main.java b/test/438-volatile/src/Main.java new file mode 100644 index 0000000000..a870e4cc38 --- /dev/null +++ b/test/438-volatile/src/Main.java @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + static volatile long long_volatile; + static volatile double double_volatile; + + public static void main(String[] args) { + checkVolatileUpdate(0L); + checkVolatileUpdate(Long.MAX_VALUE); + checkVolatileUpdate(Long.MIN_VALUE); + + checkVolatileUpdate(0.0); + checkVolatileUpdate(Double.MAX_VALUE); + checkVolatileUpdate(-Double.MAX_VALUE); + } + + public static long $opt$update(long a) { + long_volatile = a; + return long_volatile; + } + + public static double $opt$update(double a) { + double_volatile = a; + return double_volatile; + } + + public static void checkVolatileUpdate(long value) { + if (value != $opt$update(value)) { + throw new RuntimeException("Volatile update failed for long:" + value); + } + } + + public static void checkVolatileUpdate(double value) { + if (value != $opt$update(value)) { + throw new RuntimeException("Volatile update failed for double:" + value); + } + } + +} diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt index 5f86f1e047..6cb08f483e 100644 --- a/test/800-smali/expected.txt +++ b/test/800-smali/expected.txt @@ -9,4 +9,8 @@ invoke-super abstract BadCaseInOpRegRegReg CmpLong FloatIntConstPassing +b/18718277 +b/18800943 (1) +b/18800943 (2) +MoveExc Done! diff --git a/test/800-smali/smali/b_18718277.smali b/test/800-smali/smali/b_18718277.smali new file mode 100644 index 0000000000..b14ad2081e --- /dev/null +++ b/test/800-smali/smali/b_18718277.smali @@ -0,0 +1,29 @@ +.class public LB18718277; + +.super Ljava/lang/Object; + +.method public static helper(I)I + .locals 1 + add-int/lit8 v0, p0, 2 + neg-int v0, v0 + return v0 +.end method + +.method public static getInt()I + .registers 2 + const/4 v1, 3 + invoke-static {v1}, LB18718277;->helper(I)I + move-result v0 + :outer_loop + if-eqz v1, :exit_outer_loop + const/4 v0, 0 + if-eqz v0, :skip_dead_loop + :dead_loop + add-int/2addr v0, v0 + if-gez v0, :dead_loop + :skip_dead_loop + add-int/lit8 v1, v1, -1 + goto :outer_loop + :exit_outer_loop + return v0 +.end method diff --git a/test/800-smali/smali/b_18800943_1.smali b/test/800-smali/smali/b_18800943_1.smali new file mode 100644 index 0000000000..868438e57c --- /dev/null +++ b/test/800-smali/smali/b_18800943_1.smali @@ -0,0 +1,9 @@ +.class public LB18800943_1; +.super Ljava/lang/Object; + +# This constructor should fail verification as the object is not initialized by a super-call. +.method public constructor <init>()V +.registers 1 + nop + return-void +.end method diff --git a/test/800-smali/smali/b_18800943_2.smali b/test/800-smali/smali/b_18800943_2.smali new file mode 100644 index 0000000000..6052ada77f --- /dev/null +++ b/test/800-smali/smali/b_18800943_2.smali @@ -0,0 +1,9 @@ +.class public LB18800943_2; +.super Ljava/lang/Object; + +# This constructor should fail verification as the object is not initialized by a super-call. +.method public constructor <init>()V +.registers 1 + const v0, 0x0 + return-void +.end method diff --git a/test/800-smali/smali/move_exc.smali b/test/800-smali/smali/move_exc.smali new file mode 100644 index 0000000000..4ade4bc728 --- /dev/null +++ b/test/800-smali/smali/move_exc.smali @@ -0,0 +1,29 @@ +.class public LMoveExc; +.super Ljava/lang/Object; + + +.method public constructor <init>()V +.registers 1 + invoke-direct {p0}, Ljava/lang/Object;-><init>()V + return-void +.end method + +.method public static run()V +.registers 6 +:Label1 + const v1, 15 + const v2, 0 + div-int v0, v1, v2 + +:Label2 + goto :Label4 + +:Label3 + move-exception v3 + throw v3 + +:Label4 + return-void + +.catchall {:Label1 .. :Label2} :Label3 +.end method diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java index a2db05135d..2eda85083f 100644 --- a/test/800-smali/src/Main.java +++ b/test/800-smali/src/Main.java @@ -65,6 +65,10 @@ public class Main { testCases.add(new TestCase("BadCaseInOpRegRegReg", "BadCaseInOpRegRegReg", "getInt", null, null, 2)); testCases.add(new TestCase("CmpLong", "CmpLong", "run", null, null, 0)); testCases.add(new TestCase("FloatIntConstPassing", "FloatIntConstPassing", "run", null, null, 2)); + testCases.add(new TestCase("b/18718277", "B18718277", "getInt", null, null, 0)); + testCases.add(new TestCase("b/18800943 (1)", "B18800943_1", "n_a", null, new VerifyError(), 0)); + testCases.add(new TestCase("b/18800943 (2)", "B18800943_2", "n_a", null, new VerifyError(), 0)); + testCases.add(new TestCase("MoveExc", "MoveExc", "run", null, new ArithmeticException(), null)); } public void runTests() { diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index e085d3f470..fd66a02f32 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -214,9 +214,7 @@ endif TEST_ART_BROKEN_NO_RELOCATE_TESTS := # Tests that are broken with GC stress. -TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \ - 004-SignalTest \ - 114-ParallelGC +TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := ifneq (,$(filter gcstress,$(GC_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ @@ -297,10 +295,7 @@ endif TEST_ART_BROKEN_DEFAULT_RUN_TESTS := # Known broken tests for the arm64 optimizing compiler backend. -TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS := \ - 003-omnibus-opcodes64 \ - 012-math64 \ - 436-rem-float64 +TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS := ifneq (,$(filter optimizing,$(COMPILER_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \ diff --git a/test/run-test b/test/run-test index 2abc1fa262..aba4e05d03 100755 --- a/test/run-test +++ b/test/run-test @@ -258,6 +258,9 @@ while true; do elif [ "x$1" = "x--always-clean" ]; then always_clean="yes" shift + elif [ "x$1" = "x--dex2oat-swap" ]; then + run_args="${run_args} --dex2oat-swap" + shift elif expr "x$1" : "x--" >/dev/null 2>&1; then echo "unknown $0 option: $1" 1>&2 usage="yes" @@ -452,6 +455,7 @@ if [ "$usage" = "yes" ]; then echo " --gcverify Run with gc verification" echo " --always-clean Delete the test files even if the test fails." echo " --android-root [path] The path on target for the android root. (/system by default)." + echo " --dex2oat-swap Use a dex2oat swap file." ) 1>&2 exit 1 fi diff --git a/tools/checker.py b/tools/checker.py index 82a1e6bd22..74c6d616c5 100755 --- a/tools/checker.py +++ b/tools/checker.py @@ -20,9 +20,9 @@ # against a set of assertions specified alongside the tests. # # Tests are written in Java, turned into DEX and compiled with the Optimizing -# compiler. "Check lines" are comments in the Java file which begin with prefix -# 'CHECK' followed by a pattern that the engine attempts to match in the -# compiler-generated output. +# compiler. "Check lines" are assertions formatted as comments of the Java file. +# They begin with prefix 'CHECK' followed by a pattern that the engine attempts +# to match in the compiler-generated output. # # Assertions are tested in groups which correspond to the individual compiler # passes. Each group of check lines therefore must start with a 'CHECK-START' @@ -30,7 +30,23 @@ # name must exactly match one of the groups recognized in the output (they can # be listed with the '--list-groups' command-line flag). # -# Check line patterns are treated as plain text rather than regular expressions +# Matching of check lines is carried out in the order of appearance in the +# source file. There are three types of check lines: +# - CHECK: Must match an output line which appears in the output group +# later than lines matched against any preceeding checks. Output +# lines must therefore match the check lines in the same order. +# These are referred to as "in-order" checks in the code. +# - CHECK-DAG: Must match an output line which appears in the output group +# later than lines matched against any preceeding in-order checks. +# In other words, the order of output lines does not matter +# between consecutive DAG checks. +# - CHECK-NOT: Must not match any output line which appear in the output group +# later than lines matched against any preceeding checks and +# earlier than lines matched against any subsequent checks. +# Surrounding non-negative checks (or boundaries of the group) +# therefore create a scope within which the assertion is verified. +# +# Check-line patterns are treated as plain text rather than regular expressions # but are whitespace agnostic. # # Actual regex patterns can be inserted enclosed in '{{' and '}}' brackets. If @@ -115,13 +131,16 @@ class CheckLine(CommonEqualityMixin): more regex elements. Matching against an output line is successful only if all regex elements can be matched in the given order.""" - def __init__(self, lineContent, lineNo=-1): - lineContent = lineContent.strip() + class Variant(object): + """Supported types of assertions.""" + InOrder, DAG, Not = range(3) + def __init__(self, content, variant=Variant.InOrder, lineNo=-1): + self.content = content.strip() + self.variant = variant self.lineNo = lineNo - self.content = lineContent - self.lineParts = self.__parse(lineContent) + self.lineParts = self.__parse(self.content) if not self.lineParts: raise Exception("Empty check line") @@ -180,7 +199,11 @@ class CheckLine(CommonEqualityMixin): elif self.__isMatchAtStart(matchVariable): var = line[0:matchVariable.end()] line = line[matchVariable.end():] - lineParts.append(CheckElement.parseVariable(var)) + elem = CheckElement.parseVariable(var) + if self.variant == CheckLine.Variant.Not and elem.variant == CheckElement.Variant.VarDef: + raise Exception("CHECK-NOT check lines cannot define variables " + + "(line " + str(self.lineNo) + ")") + lineParts.append(elem) else: # If we're not currently looking at a special marker, this is a plain # text match all the way until the first special marker (or the end @@ -267,44 +290,101 @@ class CheckGroup(CommonEqualityMixin): def __headAndTail(self, list): return list[0], list[1:] - # The driver of matching inside a group. It simultaneously reads lines from - # the output and check groups and attempts to match them against each other - # in the correct order. + # Splits a list of check lines at index 'i' such that lines[i] is the first + # element whose variant is not equal to the given parameter. + def __splitByVariant(self, lines, variant): + i = 0 + while i < len(lines) and lines[i].variant == variant: + i += 1 + return lines[:i], lines[i:] + + # Extracts the first sequence of check lines which are independent of each + # other's match location, i.e. either consecutive DAG lines or a single + # InOrder line. Any Not lines preceeding this sequence are also extracted. + def __nextIndependentChecks(self, checkLines): + notChecks, checkLines = self.__splitByVariant(checkLines, CheckLine.Variant.Not) + if not checkLines: + return notChecks, [], [] + + head, tail = self.__headAndTail(checkLines) + if head.variant == CheckLine.Variant.InOrder: + return notChecks, [head], tail + else: + assert head.variant == CheckLine.Variant.DAG + independentChecks, checkLines = self.__splitByVariant(checkLines, CheckLine.Variant.DAG) + return notChecks, independentChecks, checkLines + + # If successful, returns the line number of the first output line matching the + # check line and the updated variable state. Otherwise returns -1 and None, + # respectively. The 'lineFilter' parameter can be used to supply a list of + # line numbers (counting from 1) which should be skipped. + def __findFirstMatch(self, checkLine, outputLines, lineFilter, varState): + matchLineNo = 0 + for outputLine in outputLines: + matchLineNo += 1 + if matchLineNo in lineFilter: + continue + newVarState = checkLine.match(outputLine, varState) + if newVarState is not None: + return matchLineNo, newVarState + return -1, None + + # Matches the given positive check lines against the output in order of + # appearance. Variable state is propagated but the scope of the search remains + # the same for all checks. Each output line can only be matched once. + # If all check lines are matched, the resulting variable state is returned + # together with the remaining output. The function also returns output lines + # which appear before either of the matched lines so they can be tested + # against Not checks. + def __matchIndependentChecks(self, checkLines, outputLines, varState): + # If no checks are provided, skip over the entire output. + if not checkLines: + return outputLines, varState, [] + + # Keep track of which lines have been matched. + matchedLines = [] + + # Find first unused output line which matches each check line. + for checkLine in checkLines: + matchLineNo, varState = self.__findFirstMatch(checkLine, outputLines, matchedLines, varState) + if varState is None: + raise Exception("Could not match line " + str(checkLine)) + matchedLines.append(matchLineNo) + + # Return new variable state and the output lines which lie outside the + # match locations of this independent group. + preceedingLines = outputLines[:min(matchedLines)-1] + remainingLines = outputLines[max(matchedLines):] + return preceedingLines, remainingLines, varState + + # Makes sure that the given check lines do not match any of the given output + # lines. Variable state does not change. + def __matchNotLines(self, checkLines, outputLines, varState): + for checkLine in checkLines: + assert checkLine.variant == CheckLine.Variant.Not + matchLineNo, varState = self.__findFirstMatch(checkLine, outputLines, [], varState) + if varState is not None: + raise Exception("CHECK-NOT line " + str(checkLine) + " matches output") + + # Matches the check lines in this group against an output group. It is + # responsible for running the checks in the right order and scope, and + # for propagating the variable state between the check lines. def match(self, outputGroup): - readOutputLines = 0 - lastMatch = 0 - - # Check and output lines which remain to be matched. + varState = {} checkLines = self.lines outputLines = outputGroup.body - varState = {} - # Retrieve the next check line. while checkLines: - checkLine, checkLines = self.__headAndTail(checkLines) - foundMatch = False - - # Retrieve the next output line. - while outputLines: - outputLine, outputLines = self.__headAndTail(outputLines) - readOutputLines += 1 - - # Try to match the current lines against each other. If successful, - # save the new state of variables and continue to the next check line. - newVarState = checkLine.match(outputLine, varState) - if newVarState is not None: - varState = newVarState - lastMatch = readOutputLines - foundMatch = True - break - if not foundMatch: - raise Exception("Could not match check line \"" + checkLine.content + "\" from line " + - str(lastMatch+1) + " of the output. [vars=" + str(varState) + "]") - - @staticmethod - def parse(name, lines): - return CheckGroup(name, list(map(lambda line: CheckLine(line), lines))) - + # Extract the next sequence of location-independent checks to be matched. + notChecks, independentChecks, checkLines = self.__nextIndependentChecks(checkLines) + # Match the independent checks. + notOutput, outputLines, newVarState = \ + self.__matchIndependentChecks(independentChecks, outputLines, varState) + # Run the Not checks against the output lines which lie between the last + # two independent groups or the bounds of the output. + self.__matchNotLines(notChecks, notOutput, varState) + # Update variable state. + varState = newVarState class OutputGroup(CommonEqualityMixin): """Represents a named part of the test output against which a check group of @@ -378,20 +458,35 @@ class CheckFile(FileSplitMixin): return None def _processLine(self, line, lineNo): + # Lines beginning with 'CHECK-START' start a new check group. startLine = self._extractLine(self.prefix + "-START", line) if startLine is not None: - # Line starts with the CHECK-START keyword, start a new group - return (None, startLine) - else: - # Otherwise try to parse it as a standard CHECK line. If unsuccessful, - # _extractLine will return None and the line will be ignored. - return (self._extractLine(self.prefix, line), None) + return None, startLine + + # Lines starting only with 'CHECK' are matched in order. + plainLine = self._extractLine(self.prefix, line) + if plainLine is not None: + return (plainLine, CheckLine.Variant.InOrder), None + + # 'CHECK-DAG' lines are no-order assertions. + dagLine = self._extractLine(self.prefix + "-DAG", line) + if dagLine is not None: + return (dagLine, CheckLine.Variant.DAG), None + + # 'CHECK-NOT' lines are no-order negative assertions. + notLine = self._extractLine(self.prefix + "-NOT", line) + if notLine is not None: + return (notLine, CheckLine.Variant.Not), None + + # Other lines are ignored. + return None, None def _exceptionLineOutsideGroup(self, line, lineNo): raise Exception("Check file line lies outside a group (line " + str(lineNo) + ")") def _processGroup(self, name, lines): - return CheckGroup.parse(name, lines) + checkLines = list(map(lambda line: CheckLine(line[0], line[1]), lines)) + return CheckGroup(name, checkLines) def match(self, outputFile, printInfo=False): for checkGroup in self.groups: diff --git a/tools/checker_test.py b/tools/checker_test.py index f69f9e3f2b..8947d8a076 100755 --- a/tools/checker_test.py +++ b/tools/checker_test.py @@ -64,6 +64,9 @@ class TestCheckLine_Parse(unittest.TestCase): def __parsesTo(self, string, expected): self.assertEqual(expected, self.__getRegex(self.__tryParse(string))) + def __tryParseNot(self, string): + return checker.CheckLine(string, checker.CheckLine.Variant.UnorderedNot) + def __parsesPattern(self, string, pattern): line = self.__tryParse(string) self.assertEqual(1, len(line.lineParts)) @@ -163,6 +166,9 @@ class TestCheckLine_Parse(unittest.TestCase): self.__parsesTo("{{abc}}{{def}}", "(abc)(def)") self.__parsesTo("[[ABC:abc]][[DEF:def]]", "(abc)(def)") + def test_NoVarDefsInNotChecks(self): + with self.assertRaises(Exception): + self.__tryParseNot("[[ABC:abc]]") class TestCheckLine_Match(unittest.TestCase): def __matchSingle(self, checkString, outputString, varState={}): @@ -228,9 +234,23 @@ class TestCheckLine_Match(unittest.TestCase): self.__notMatchSingle("[[X:..]]foo[[X]]", ".*fooAAAA") +CheckVariant = checker.CheckLine.Variant + +def prepareSingleCheck(line): + if isinstance(line, str): + return checker.CheckLine(line) + else: + return checker.CheckLine(line[0], line[1]) + +def prepareChecks(lines): + if isinstance(lines, str): + lines = lines.splitlines() + return list(map(lambda line: prepareSingleCheck(line), lines)) + + class TestCheckGroup_Match(unittest.TestCase): - def __matchMulti(self, checkString, outputString): - checkGroup = checker.CheckGroup.parse("MyGroup", checkString.splitlines()) + def __matchMulti(self, checkLines, outputString): + checkGroup = checker.CheckGroup("MyGroup", prepareChecks(checkLines)) outputGroup = checker.OutputGroup("MyGroup", outputString.splitlines()) return checkGroup.match(outputGroup) @@ -271,14 +291,62 @@ class TestCheckGroup_Match(unittest.TestCase): ### 1234 ###"""); def test_Ordering(self): - self.__matchMulti("""foo - bar""", + self.__matchMulti([("foo", CheckVariant.InOrder), + ("bar", CheckVariant.InOrder)], """foo bar""") - self.__notMatchMulti("""foo - bar""", + self.__notMatchMulti([("foo", CheckVariant.InOrder), + ("bar", CheckVariant.InOrder)], """bar foo""") + self.__matchMulti([("abc", CheckVariant.DAG), + ("def", CheckVariant.DAG)], + """abc + def""") + self.__matchMulti([("abc", CheckVariant.DAG), + ("def", CheckVariant.DAG)], + """def + abc""") + self.__matchMulti([("foo", CheckVariant.InOrder), + ("abc", CheckVariant.DAG), + ("def", CheckVariant.DAG), + ("bar", CheckVariant.InOrder)], + """foo + def + abc + bar""") + self.__notMatchMulti([("foo", CheckVariant.InOrder), + ("abc", CheckVariant.DAG), + ("def", CheckVariant.DAG), + ("bar", CheckVariant.InOrder)], + """foo + abc + bar""") + self.__notMatchMulti([("foo", CheckVariant.InOrder), + ("abc", CheckVariant.DAG), + ("def", CheckVariant.DAG), + ("bar", CheckVariant.InOrder)], + """foo + def + bar""") + + def test_NotAssertions(self): + self.__matchMulti([("foo", CheckVariant.Not)], + """abc + def""") + self.__notMatchMulti([("foo", CheckVariant.Not)], + """abc foo + def""") + + def test_LineOnlyMatchesOnce(self): + self.__matchMulti([("foo", CheckVariant.DAG), + ("foo", CheckVariant.DAG)], + """foo + foo""") + self.__notMatchMulti([("foo", CheckVariant.DAG), + ("foo", CheckVariant.DAG)], + """foo + bar""") class TestOutputFile_Parse(unittest.TestCase): def __parsesTo(self, string, expected): @@ -355,7 +423,7 @@ class TestCheckFile_Parse(unittest.TestCase): self.__parsesTo("""// CHECK-START: Example Group // CHECK: foo // CHECK: bar""", - [ checker.CheckGroup.parse("Example Group", [ "foo", "bar" ]) ]) + [ checker.CheckGroup("Example Group", prepareChecks([ "foo", "bar" ])) ]) def test_MultipleGroups(self): self.__parsesTo("""// CHECK-START: Example Group1 @@ -364,8 +432,20 @@ class TestCheckFile_Parse(unittest.TestCase): // CHECK-START: Example Group2 // CHECK: abc // CHECK: def""", - [ checker.CheckGroup.parse("Example Group1", [ "foo", "bar" ]), - checker.CheckGroup.parse("Example Group2", [ "abc", "def" ]) ]) + [ checker.CheckGroup("Example Group1", prepareChecks([ "foo", "bar" ])), + checker.CheckGroup("Example Group2", prepareChecks([ "abc", "def" ])) ]) + + def test_CheckVariants(self): + self.__parsesTo("""// CHECK-START: Example Group + // CHECK: foo + // CHECK-NOT: bar + // CHECK-DAG: abc + // CHECK-DAG: def""", + [ checker.CheckGroup("Example Group", + prepareChecks([ ("foo", CheckVariant.InOrder), + ("bar", CheckVariant.Not), + ("abc", CheckVariant.DAG), + ("def", CheckVariant.DAG) ])) ]) if __name__ == '__main__': unittest.main() |