diff options
Diffstat (limited to 'compiler')
321 files changed, 6918 insertions, 5728 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index de98fdb993..a879bd8f06 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -33,11 +33,8 @@ art_cc_defaults { defaults: ["art_defaults"], host_supported: true, srcs: [ - "compiled_method.cc", "debug/elf_debug_writer.cc", "dex/inline_method_analyser.cc", - "dex/verification_results.cc", - "driver/compiled_method_storage.cc", "driver/compiler_options.cc", "driver/dex_compilation_unit.cc", "jit/jit_compiler.cc", @@ -94,10 +91,10 @@ art_cc_defaults { "optimizing/ssa_phi_elimination.cc", "optimizing/stack_map_stream.cc", "optimizing/superblock_cloner.cc", + "optimizing/write_barrier_elimination.cc", "trampolines/trampoline_compiler.cc", "utils/assembler.cc", "utils/jni_macro_assembler.cc", - "utils/swap_space.cc", "compiler.cc", ], @@ -133,6 +130,11 @@ art_cc_defaults { "utils/arm64/managed_register_arm64.cc", ], }, + riscv64: { + srcs: [ + "utils/riscv64/managed_register_riscv64.cc", + ], + }, x86: { srcs: [ "jni/quick/x86/calling_convention_x86.cc", @@ -176,6 +178,8 @@ art_cc_defaults { ], export_include_dirs: ["."], + // Not using .map.txt because this is an internal API + version_script: "libart-compiler.map", } cc_defaults { @@ -228,7 +232,7 @@ art_cc_library { "libprofile", "libdexfile", ], - whole_static_libs: ["libelffile"], + static_libs: ["libelffile"], runtime_libs: [ // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load // `libart-disassembler.so`. @@ -245,6 +249,7 @@ art_cc_library { apex_available: [ "com.android.art", "com.android.art.debug", + "test_broken_com.android.art", ], } @@ -296,7 +301,7 @@ art_cc_library { "libprofiled", "libdexfiled", ], - whole_static_libs: ["libelffiled"], + static_libs: ["libelffiled"], runtime_libs: [ // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load // `libartd-disassembler.so`. @@ -369,6 +374,7 @@ art_cc_defaults { data: [ ":art-gtest-jars-ExceptionHandle", ":art-gtest-jars-Interfaces", + ":art-gtest-jars-Main", ":art-gtest-jars-MyClassNatives", ], tidy_timeout_srcs: [ @@ -381,9 +387,9 @@ art_cc_defaults { "optimizing/ssa_test.cc", ], srcs: [ + "compiler_reflection_test.cc", "debug/dwarf/dwarf_test.cc", "debug/src_map_elem_test.cc", - "driver/compiled_method_storage_test.cc", "exception_test.cc", "jni/jni_compiler_test.cc", "linker/linker_patch_test.cc", @@ -419,7 +425,6 @@ art_cc_defaults { "optimizing/suspend_check_test.cc", "utils/atomic_dex_ref_map_test.cc", "utils/dedupe_set_test.cc", - "utils/swap_space_test.cc", "jni/jni_cfi_test.cc", "optimizing/codegen_test.cc", @@ -442,6 +447,11 @@ art_cc_defaults { "utils/arm64/managed_register_arm64_test.cc", ], }, + riscv64: { + srcs: [ + "utils/riscv64/managed_register_riscv64_test.cc", + ], + }, x86: { srcs: [ "utils/x86/managed_register_x86_test.cc", @@ -465,8 +475,8 @@ art_cc_defaults { ], shared_libs: [ - "libbacktrace", "libnativeloader", + "libunwindstack", ], target: { @@ -488,10 +498,12 @@ art_cc_test { ], shared_libs: [ "libprofiled", - "libartd-compiler", "libartd-simulator-container", + "liblzma", ], static_libs: [ + "libartd-compiler", + "libelffiled", "libvixld", ], } @@ -506,7 +518,8 @@ art_cc_test { data: [":generate-boot-image"], shared_libs: [ "libprofile", - "libart-compiler", + "liblzma", + "libartpalette", ], static_libs: [ // For now, link `libart-simulator-container` statically for simplicity, @@ -515,6 +528,8 @@ art_cc_test { // TODO(b/192070541): Consider linking `libart-simulator-container` // dynamically. "libart-simulator-container", + "libart-compiler", + "libelffile", "libvixl", ], test_config: "art_standalone_compiler_tests.xml", @@ -548,9 +563,11 @@ art_cc_test { }, }, shared_libs: [ - "libartd-compiler", + "liblzma", ], static_libs: [ + "libartd-compiler", + "libelffiled", "libvixld", ], } diff --git a/compiler/art_standalone_compiler_tests.xml b/compiler/art_standalone_compiler_tests.xml index f723971928..394ac8d4fb 100644 --- a/compiler/art_standalone_compiler_tests.xml +++ b/compiler/art_standalone_compiler_tests.xml @@ -14,6 +14,8 @@ limitations under the License. --> <configuration description="Runs art_standalone_compiler_tests."> + <option name="config-descriptor:metadata" key="mainline-param" value="com.google.android.art.apex" /> + <target_preparer class="com.android.compatibility.common.tradefed.targetprep.FilePusher"> <option name="cleanup" value="true" /> <option name="push" value="art_standalone_compiler_tests->/data/local/tmp/art_standalone_compiler_tests/art_standalone_compiler_tests" /> @@ -24,6 +26,7 @@ <option name="cleanup" value="true" /> <option name="push" value="art-gtest-jars-ExceptionHandle.jar->/data/local/tmp/art_standalone_compiler_tests/art-gtest-jars-ExceptionHandle.jar" /> <option name="push" value="art-gtest-jars-Interfaces.jar->/data/local/tmp/art_standalone_compiler_tests/art-gtest-jars-Interfaces.jar" /> + <option name="push" value="art-gtest-jars-Main.jar->/data/local/tmp/art_standalone_compiler_tests/art-gtest-jars-Main.jar" /> <option name="push" value="art-gtest-jars-MyClassNatives.jar->/data/local/tmp/art_standalone_compiler_tests/art-gtest-jars-MyClassNatives.jar" /> </target_preparer> diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h index 9755ef12d0..e65bee8e2e 100644 --- a/compiler/cfi_test.h +++ b/compiler/cfi_test.h @@ -23,6 +23,7 @@ #include "arch/instruction_set.h" #include "base/enums.h" +#include "base/macros.h" #include "debug/dwarf/dwarf_test.h" #include "disassembler.h" #include "dwarf/dwarf_constants.h" @@ -30,7 +31,7 @@ #include "gtest/gtest.h" #include "thread.h" -namespace art { +namespace art HIDDEN { class CFITest : public dwarf::DwarfTest { public: diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index bbb2016566..442b96e5fa 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -28,10 +28,8 @@ #include "base/memfd.h" #include "base/utils.h" #include "class_linker.h" -#include "compiled_method-inl.h" #include "dex/descriptors_names.h" -#include "dex/verification_results.h" -#include "driver/compiled_method_storage.h" +#include "driver/compiled_code_storage.h" #include "driver/compiler_options.h" #include "jni/java_vm_ext.h" #include "interpreter/interpreter.h" @@ -44,7 +42,7 @@ #include "thread-current-inl.h" #include "utils/atomic_dex_ref_map-inl.h" -namespace art { +namespace art HIDDEN { class CommonCompilerTestImpl::CodeAndMetadata { public: @@ -58,10 +56,10 @@ class CommonCompilerTestImpl::CodeAndMetadata { const uint32_t vmap_table_offset = vmap_table.empty() ? 0u : sizeof(OatQuickMethodHeader) + vmap_table.size(); OatQuickMethodHeader method_header(vmap_table_offset); - const size_t code_alignment = GetInstructionSetAlignment(instruction_set); + const size_t code_alignment = GetInstructionSetCodeAlignment(instruction_set); DCHECK_ALIGNED_PARAM(kPageSize, code_alignment); - code_offset_ = RoundUp(vmap_table.size() + sizeof(method_header), code_alignment); - const uint32_t capacity = RoundUp(code_offset_ + code_size, kPageSize); + const uint32_t code_offset = RoundUp(vmap_table.size() + sizeof(method_header), code_alignment); + const uint32_t capacity = RoundUp(code_offset + code_size, kPageSize); // Create a memfd handle with sufficient capacity. android::base::unique_fd mem_fd(art::memfd_create_compat("test code", /*flags=*/ 0)); @@ -82,12 +80,12 @@ class CommonCompilerTestImpl::CodeAndMetadata { CHECK(rw_map_.IsValid()) << error_msg; // Store data. - uint8_t* code_addr = rw_map_.Begin() + code_offset_; + uint8_t* code_addr = rw_map_.Begin() + code_offset; CHECK_ALIGNED_PARAM(code_addr, code_alignment); - CHECK_LE(vmap_table_offset, code_offset_); + CHECK_LE(vmap_table_offset, code_offset); memcpy(code_addr - vmap_table_offset, vmap_table.data(), vmap_table.size()); static_assert(std::is_trivially_copyable<OatQuickMethodHeader>::value, "Cannot use memcpy"); - CHECK_LE(sizeof(method_header), code_offset_); + CHECK_LE(sizeof(method_header), code_offset); memcpy(code_addr - sizeof(method_header), &method_header, sizeof(method_header)); CHECK_LE(code_size, static_cast<size_t>(rw_map_.End() - code_addr)); memcpy(code_addr, code.data(), code_size); @@ -108,22 +106,84 @@ class CommonCompilerTestImpl::CodeAndMetadata { /*filename=*/ "test code", &error_msg); CHECK(rx_map_.IsValid()) << error_msg; + + DCHECK_LT(code_offset, rx_map_.Size()); + size_t adjustment = GetInstructionSetEntryPointAdjustment(instruction_set); + entry_point_ = rx_map_.Begin() + code_offset + adjustment; } - const void* GetCodePointer() const { + const void* GetEntryPoint() const { DCHECK(rx_map_.IsValid()); - DCHECK_LE(code_offset_, rx_map_.Size()); - return rx_map_.Begin() + code_offset_; + return entry_point_; } private: MemMap rw_map_; MemMap rx_map_; - uint32_t code_offset_; + const void* entry_point_; DISALLOW_COPY_AND_ASSIGN(CodeAndMetadata); }; +class CommonCompilerTestImpl::OneCompiledMethodStorage final : public CompiledCodeStorage { + public: + OneCompiledMethodStorage() {} + ~OneCompiledMethodStorage() {} + + CompiledMethod* CreateCompiledMethod(InstructionSet instruction_set, + ArrayRef<const uint8_t> code, + ArrayRef<const uint8_t> stack_map, + ArrayRef<const uint8_t> cfi ATTRIBUTE_UNUSED, + ArrayRef<const linker::LinkerPatch> patches, + bool is_intrinsic ATTRIBUTE_UNUSED) override { + // Supports only one method at a time. + CHECK_EQ(instruction_set_, InstructionSet::kNone); + CHECK_NE(instruction_set, InstructionSet::kNone); + instruction_set_ = instruction_set; + CHECK(code_.empty()); + CHECK(!code.empty()); + code_.assign(code.begin(), code.end()); + CHECK(stack_map_.empty()); + CHECK(!stack_map.empty()); + stack_map_.assign(stack_map.begin(), stack_map.end()); + CHECK(patches.empty()) << "Linker patches are unsupported for compiler gtests."; + return reinterpret_cast<CompiledMethod*>(this); + } + + ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED, + /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) override { + LOG(FATAL) << "Unsupported."; + UNREACHABLE(); + } + + void SetThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED, + ArrayRef<const uint8_t> code ATTRIBUTE_UNUSED, + const std::string& debug_name ATTRIBUTE_UNUSED) override { + LOG(FATAL) << "Unsupported."; + UNREACHABLE(); + } + + InstructionSet GetInstructionSet() const { + CHECK_NE(instruction_set_, InstructionSet::kNone); + return instruction_set_; + } + + ArrayRef<const uint8_t> GetCode() const { + CHECK(!code_.empty()); + return ArrayRef<const uint8_t>(code_); + } + + ArrayRef<const uint8_t> GetStackMap() const { + CHECK(!stack_map_.empty()); + return ArrayRef<const uint8_t>(stack_map_); + } + + private: + InstructionSet instruction_set_ = InstructionSet::kNone; + std::vector<uint8_t> code_; + std::vector<uint8_t> stack_map_; +}; + std::unique_ptr<CompilerOptions> CommonCompilerTestImpl::CreateCompilerOptions( InstructionSet instruction_set, const std::string& variant) { std::unique_ptr<CompilerOptions> compiler_options = std::make_unique<CompilerOptions>(); @@ -143,24 +203,7 @@ const void* CommonCompilerTestImpl::MakeExecutable(ArrayRef<const uint8_t> code, InstructionSet instruction_set) { CHECK_NE(code.size(), 0u); code_and_metadata_.emplace_back(code, vmap_table, instruction_set); - return code_and_metadata_.back().GetCodePointer(); -} - -void CommonCompilerTestImpl::MakeExecutable(ArtMethod* method, - const CompiledMethod* compiled_method) { - CHECK(method != nullptr); - const void* method_code = nullptr; - // If the code size is 0 it means the method was skipped due to profile guided compilation. - if (compiled_method != nullptr && compiled_method->GetQuickCode().size() != 0u) { - const void* code_ptr = MakeExecutable(compiled_method->GetQuickCode(), - compiled_method->GetVmapTable(), - compiled_method->GetInstructionSet()); - method_code = - CompiledMethod::CodePointer(code_ptr, compiled_method->GetInstructionSet()); - LOG(INFO) << "MakeExecutable " << method->PrettyMethod() << " code=" << method_code; - } - Runtime::Current()->GetInstrumentation()->InitializeMethodsCode( - method, /*aot_code=*/ method_code); + return code_and_metadata_.back().GetEntryPoint(); } void CommonCompilerTestImpl::SetUp() { @@ -207,7 +250,6 @@ void CommonCompilerTestImpl::OverrideInstructionSetFeatures(InstructionSet instr void CommonCompilerTestImpl::SetUpRuntimeOptionsImpl() { compiler_options_.reset(new CompilerOptions); - verification_results_.reset(new VerificationResults()); ApplyInstructionSet(); } @@ -221,7 +263,6 @@ void CommonCompilerTestImpl::SetCompilerKind(Compiler::Kind compiler_kind) { void CommonCompilerTestImpl::TearDown() { code_and_metadata_.clear(); - verification_results_.reset(); compiler_options_.reset(); } @@ -229,7 +270,7 @@ void CommonCompilerTestImpl::CompileMethod(ArtMethod* method) { CHECK(method != nullptr); TimingLogger timings("CommonCompilerTestImpl::CompileMethod", false, false); TimingLogger::ScopedTiming t(__FUNCTION__, &timings); - CompiledMethodStorage storage(/*swap_fd=*/ -1); + OneCompiledMethodStorage storage; CompiledMethod* compiled_method = nullptr; { DCHECK(!Runtime::Current()->IsStarted()); @@ -241,7 +282,6 @@ void CommonCompilerTestImpl::CompileMethod(ArtMethod* method) { Handle<mirror::DexCache> dex_cache = hs.NewHandle(GetClassLinker()->FindDexCache(self, dex_file)); Handle<mirror::ClassLoader> class_loader = hs.NewHandle(method->GetClassLoader()); - compiler_options_->verification_results_ = verification_results_.get(); if (method->IsNative()) { compiled_method = compiler->JniCompile(method->GetAccessFlags(), method->GetDexMethodIndex(), @@ -257,48 +297,17 @@ void CommonCompilerTestImpl::CompileMethod(ArtMethod* method) { dex_file, dex_cache); } - compiler_options_->verification_results_ = nullptr; + CHECK(compiled_method != nullptr) << "Failed to compile " << method->PrettyMethod(); + CHECK_EQ(reinterpret_cast<OneCompiledMethodStorage*>(compiled_method), &storage); } - CHECK(method != nullptr); { TimingLogger::ScopedTiming t2("MakeExecutable", &timings); - MakeExecutable(method, compiled_method); + const void* method_code = MakeExecutable(storage.GetCode(), + storage.GetStackMap(), + storage.GetInstructionSet()); + LOG(INFO) << "MakeExecutable " << method->PrettyMethod() << " code=" << method_code; + GetRuntime()->GetInstrumentation()->InitializeMethodsCode(method, /*aot_code=*/ method_code); } - CompiledMethod::ReleaseSwapAllocatedCompiledMethod(&storage, compiled_method); -} - -void CommonCompilerTestImpl::CompileDirectMethod(Handle<mirror::ClassLoader> class_loader, - const char* class_name, - const char* method_name, - const char* signature) { - std::string class_descriptor(DotToDescriptor(class_name)); - Thread* self = Thread::Current(); - ClassLinker* class_linker = GetClassLinker(); - ObjPtr<mirror::Class> klass = - class_linker->FindClass(self, class_descriptor.c_str(), class_loader); - CHECK(klass != nullptr) << "Class not found " << class_name; - auto pointer_size = class_linker->GetImagePointerSize(); - ArtMethod* method = klass->FindClassMethod(method_name, signature, pointer_size); - CHECK(method != nullptr && method->IsDirect()) << "Direct method not found: " - << class_name << "." << method_name << signature; - CompileMethod(method); -} - -void CommonCompilerTestImpl::CompileVirtualMethod(Handle<mirror::ClassLoader> class_loader, - const char* class_name, - const char* method_name, - const char* signature) { - std::string class_descriptor(DotToDescriptor(class_name)); - Thread* self = Thread::Current(); - ClassLinker* class_linker = GetClassLinker(); - ObjPtr<mirror::Class> klass = - class_linker->FindClass(self, class_descriptor.c_str(), class_loader); - CHECK(klass != nullptr) << "Class not found " << class_name; - auto pointer_size = class_linker->GetImagePointerSize(); - ArtMethod* method = klass->FindClassMethod(method_name, signature, pointer_size); - CHECK(method != nullptr && !method->IsDirect()) << "Virtual method not found: " - << class_name << "." << method_name << signature; - CompileMethod(method); } void CommonCompilerTestImpl::ClearBootImageOption() { diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index 89cc1fa28f..f3cd13296c 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -24,25 +24,25 @@ #include "arch/instruction_set.h" #include "arch/instruction_set_features.h" +#include "base/macros.h" #include "common_runtime_test.h" #include "compiler.h" #include "oat_file.h" -namespace art { +namespace art HIDDEN { namespace mirror { class ClassLoader; } // namespace mirror -class CompiledMethod; class CompilerOptions; class CumulativeLogger; class DexFile; class TimingLogger; -class VerificationResults; template<class T> class Handle; -class CommonCompilerTestImpl { +// Export all symbols in `CommonCompilerTestImpl` for dex2oat tests. +class EXPORT CommonCompilerTestImpl { public: static std::unique_ptr<CompilerOptions> CreateCompilerOptions(InstructionSet instruction_set, const std::string& variant); @@ -55,9 +55,6 @@ class CommonCompilerTestImpl { ArrayRef<const uint8_t> vmap_table, InstructionSet instruction_set); - void MakeExecutable(ArtMethod* method, const CompiledMethod* compiled_method) - REQUIRES_SHARED(Locks::mutator_lock_); - protected: void SetUp(); @@ -74,14 +71,6 @@ class CommonCompilerTestImpl { void CompileMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_); - void CompileDirectMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name, - const char* method_name, const char* signature) - REQUIRES_SHARED(Locks::mutator_lock_); - - void CompileVirtualMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name, - const char* method_name, const char* signature) - REQUIRES_SHARED(Locks::mutator_lock_); - void ApplyInstructionSet(); void OverrideInstructionSetFeatures(InstructionSet instruction_set, const std::string& variant); @@ -96,7 +85,6 @@ class CommonCompilerTestImpl { = InstructionSetFeatures::FromCppDefines(); std::unique_ptr<CompilerOptions> compiler_options_; - std::unique_ptr<VerificationResults> verification_results_; protected: virtual ClassLinker* GetClassLinker() = 0; @@ -104,6 +92,8 @@ class CommonCompilerTestImpl { private: class CodeAndMetadata; + class OneCompiledMethodStorage; + std::vector<CodeAndMetadata> code_and_metadata_; }; diff --git a/compiler/compiled_method-inl.h b/compiler/compiled_method-inl.h deleted file mode 100644 index e60b30fed2..0000000000 --- a/compiler/compiled_method-inl.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (C) 2017 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_COMPILED_METHOD_INL_H_ -#define ART_COMPILER_COMPILED_METHOD_INL_H_ - -#include "compiled_method.h" - -#include "base/array_ref.h" -#include "base/length_prefixed_array.h" -#include "linker/linker_patch.h" - -namespace art { - -inline ArrayRef<const uint8_t> CompiledCode::GetQuickCode() const { - return GetArray(quick_code_); -} - -template <typename T> -inline ArrayRef<const T> CompiledCode::GetArray(const LengthPrefixedArray<T>* array) { - if (array == nullptr) { - return ArrayRef<const T>(); - } - DCHECK_NE(array->size(), 0u); - return ArrayRef<const T>(&array->At(0), array->size()); -} - -inline ArrayRef<const uint8_t> CompiledMethod::GetVmapTable() const { - return GetArray(vmap_table_); -} - -inline ArrayRef<const uint8_t> CompiledMethod::GetCFIInfo() const { - return GetArray(cfi_info_); -} - -inline ArrayRef<const linker::LinkerPatch> CompiledMethod::GetPatches() const { - return GetArray(patches_); -} - -} // namespace art - -#endif // ART_COMPILER_COMPILED_METHOD_INL_H_ diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc deleted file mode 100644 index 03b87ef09e..0000000000 --- a/compiler/compiled_method.cc +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "compiled_method.h" - -#include "driver/compiled_method_storage.h" -#include "utils/swap_space.h" - -namespace art { - -CompiledCode::CompiledCode(CompiledMethodStorage* storage, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code) - : storage_(storage), - quick_code_(storage->DeduplicateCode(quick_code)), - packed_fields_(InstructionSetField::Encode(instruction_set)) { -} - -CompiledCode::~CompiledCode() { - GetStorage()->ReleaseCode(quick_code_); -} - -bool CompiledCode::operator==(const CompiledCode& rhs) const { - if (quick_code_ != nullptr) { - if (rhs.quick_code_ == nullptr) { - return false; - } else if (quick_code_->size() != rhs.quick_code_->size()) { - return false; - } else { - return std::equal(quick_code_->begin(), quick_code_->end(), rhs.quick_code_->begin()); - } - } - return (rhs.quick_code_ == nullptr); -} - -size_t CompiledCode::AlignCode(size_t offset) const { - return AlignCode(offset, GetInstructionSet()); -} - -size_t CompiledCode::AlignCode(size_t offset, InstructionSet instruction_set) { - return RoundUp(offset, GetInstructionSetAlignment(instruction_set)); -} - -size_t CompiledCode::CodeDelta() const { - return CodeDelta(GetInstructionSet()); -} - -size_t CompiledCode::CodeDelta(InstructionSet instruction_set) { - switch (instruction_set) { - case InstructionSet::kArm: - case InstructionSet::kArm64: - case InstructionSet::kX86: - case InstructionSet::kX86_64: - return 0; - case InstructionSet::kThumb2: { - // +1 to set the low-order bit so a BLX will switch to Thumb mode - return 1; - } - default: - LOG(FATAL) << "Unknown InstructionSet: " << instruction_set; - UNREACHABLE(); - } -} - -const void* CompiledCode::CodePointer(const void* code_pointer, InstructionSet instruction_set) { - switch (instruction_set) { - case InstructionSet::kArm: - case InstructionSet::kArm64: - case InstructionSet::kX86: - case InstructionSet::kX86_64: - return code_pointer; - case InstructionSet::kThumb2: { - uintptr_t address = reinterpret_cast<uintptr_t>(code_pointer); - // Set the low-order bit so a BLX will switch to Thumb mode - address |= 0x1; - return reinterpret_cast<const void*>(address); - } - default: - LOG(FATAL) << "Unknown InstructionSet: " << instruction_set; - UNREACHABLE(); - } -} - -CompiledMethod::CompiledMethod(CompiledMethodStorage* storage, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const ArrayRef<const uint8_t>& vmap_table, - const ArrayRef<const uint8_t>& cfi_info, - const ArrayRef<const linker::LinkerPatch>& patches) - : CompiledCode(storage, instruction_set, quick_code), - vmap_table_(storage->DeduplicateVMapTable(vmap_table)), - cfi_info_(storage->DeduplicateCFIInfo(cfi_info)), - patches_(storage->DeduplicateLinkerPatches(patches)) { -} - -CompiledMethod* CompiledMethod::SwapAllocCompiledMethod( - CompiledMethodStorage* storage, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const ArrayRef<const uint8_t>& vmap_table, - const ArrayRef<const uint8_t>& cfi_info, - const ArrayRef<const linker::LinkerPatch>& patches) { - SwapAllocator<CompiledMethod> alloc(storage->GetSwapSpaceAllocator()); - CompiledMethod* ret = alloc.allocate(1); - alloc.construct(ret, - storage, - instruction_set, - quick_code, - vmap_table, - cfi_info, patches); - return ret; -} - -void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompiledMethodStorage* storage, - CompiledMethod* m) { - SwapAllocator<CompiledMethod> alloc(storage->GetSwapSpaceAllocator()); - alloc.destroy(m); - alloc.deallocate(m, 1); -} - -CompiledMethod::~CompiledMethod() { - CompiledMethodStorage* storage = GetStorage(); - storage->ReleaseLinkerPatches(patches_); - storage->ReleaseCFIInfo(cfi_info_); - storage->ReleaseVMapTable(vmap_table_); -} - -} // namespace art diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h deleted file mode 100644 index e92777ff12..0000000000 --- a/compiler/compiled_method.h +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_COMPILED_METHOD_H_ -#define ART_COMPILER_COMPILED_METHOD_H_ - -#include <memory> -#include <string> -#include <vector> - -#include "arch/instruction_set.h" -#include "base/bit_field.h" -#include "base/bit_utils.h" - -namespace art { - -template <typename T> class ArrayRef; -class CompiledMethodStorage; -template<typename T> class LengthPrefixedArray; - -namespace linker { -class LinkerPatch; -} // namespace linker - -class CompiledCode { - public: - // For Quick to supply an code blob - CompiledCode(CompiledMethodStorage* storage, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code); - - virtual ~CompiledCode(); - - InstructionSet GetInstructionSet() const { - return GetPackedField<InstructionSetField>(); - } - - ArrayRef<const uint8_t> GetQuickCode() const; - - bool operator==(const CompiledCode& rhs) const; - - // To align an offset from a page-aligned value to make it suitable - // for code storage. For example on ARM, to ensure that PC relative - // valu computations work out as expected. - size_t AlignCode(size_t offset) const; - static size_t AlignCode(size_t offset, InstructionSet instruction_set); - - // returns the difference between the code address and a usable PC. - // mainly to cope with kThumb2 where the lower bit must be set. - size_t CodeDelta() const; - static size_t CodeDelta(InstructionSet instruction_set); - - // Returns a pointer suitable for invoking the code at the argument - // code_pointer address. Mainly to cope with kThumb2 where the - // lower bit must be set to indicate Thumb mode. - static const void* CodePointer(const void* code_pointer, InstructionSet instruction_set); - - protected: - static constexpr size_t kInstructionSetFieldSize = - MinimumBitsToStore(static_cast<size_t>(InstructionSet::kLast)); - static constexpr size_t kNumberOfCompiledCodePackedBits = kInstructionSetFieldSize; - static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte; - - template <typename T> - static ArrayRef<const T> GetArray(const LengthPrefixedArray<T>* array); - - CompiledMethodStorage* GetStorage() { - return storage_; - } - - template <typename BitFieldType> - typename BitFieldType::value_type GetPackedField() const { - return BitFieldType::Decode(packed_fields_); - } - - template <typename BitFieldType> - void SetPackedField(typename BitFieldType::value_type value) { - DCHECK(IsUint<BitFieldType::size>(static_cast<uintptr_t>(value))); - packed_fields_ = BitFieldType::Update(value, packed_fields_); - } - - private: - using InstructionSetField = BitField<InstructionSet, 0u, kInstructionSetFieldSize>; - - CompiledMethodStorage* const storage_; - - // Used to store the compiled code. - const LengthPrefixedArray<uint8_t>* const quick_code_; - - uint32_t packed_fields_; -}; - -class CompiledMethod final : public CompiledCode { - public: - // Constructs a CompiledMethod. - // Note: Consider using the static allocation methods below that will allocate the CompiledMethod - // in the swap space. - CompiledMethod(CompiledMethodStorage* storage, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const ArrayRef<const uint8_t>& vmap_table, - const ArrayRef<const uint8_t>& cfi_info, - const ArrayRef<const linker::LinkerPatch>& patches); - - virtual ~CompiledMethod(); - - static CompiledMethod* SwapAllocCompiledMethod( - CompiledMethodStorage* storage, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const ArrayRef<const uint8_t>& vmap_table, - const ArrayRef<const uint8_t>& cfi_info, - const ArrayRef<const linker::LinkerPatch>& patches); - - static void ReleaseSwapAllocatedCompiledMethod(CompiledMethodStorage* storage, CompiledMethod* m); - - bool IsIntrinsic() const { - return GetPackedField<IsIntrinsicField>(); - } - - // Marks the compiled method as being generated using an intrinsic codegen. - // Such methods have no relationships to their code items. - // This affects debug information generated at link time. - void MarkAsIntrinsic() { - DCHECK(!IsIntrinsic()); - SetPackedField<IsIntrinsicField>(/* value= */ true); - } - - ArrayRef<const uint8_t> GetVmapTable() const; - - ArrayRef<const uint8_t> GetCFIInfo() const; - - ArrayRef<const linker::LinkerPatch> GetPatches() const; - - private: - static constexpr size_t kIsIntrinsicLsb = kNumberOfCompiledCodePackedBits; - static constexpr size_t kIsIntrinsicSize = 1u; - static constexpr size_t kNumberOfCompiledMethodPackedBits = kIsIntrinsicLsb + kIsIntrinsicSize; - static_assert(kNumberOfCompiledMethodPackedBits <= CompiledCode::kMaxNumberOfPackedBits, - "Too many packed fields."); - - using IsIntrinsicField = BitField<bool, kIsIntrinsicLsb, kIsIntrinsicSize>; - - // For quick code, holds code infos which contain stack maps, inline information, and etc. - const LengthPrefixedArray<uint8_t>* const vmap_table_; - // For quick code, a FDE entry for the debug_frame section. - const LengthPrefixedArray<uint8_t>* const cfi_info_; - // For quick code, linker patches needed by the method. - const LengthPrefixedArray<linker::LinkerPatch>* const patches_; -}; - -} // namespace art - -#endif // ART_COMPILER_COMPILED_METHOD_H_ diff --git a/compiler/compiler.cc b/compiler/compiler.cc index 98d73396bc..e2587c1253 100644 --- a/compiler/compiler.cc +++ b/compiler/compiler.cc @@ -25,10 +25,10 @@ #include "oat.h" #include "optimizing/optimizing_compiler.h" -namespace art { +namespace art HIDDEN { Compiler* Compiler::Create(const CompilerOptions& compiler_options, - CompiledMethodStorage* storage, + CompiledCodeStorage* storage, Compiler::Kind kind) { // Check that oat version when runtime was compiled matches the oat version of the compiler. constexpr std::array<uint8_t, 4> compiler_oat_version = OatHeader::kOatVersion; diff --git a/compiler/compiler.h b/compiler/compiler.h index afa0dbab60..ce785bb769 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -17,12 +17,13 @@ #ifndef ART_COMPILER_COMPILER_H_ #define ART_COMPILER_COMPILER_H_ +#include "base/macros.h" #include "base/mutex.h" #include "base/os.h" #include "compilation_kind.h" #include "dex/invoke_type.h" -namespace art { +namespace art HIDDEN { namespace dex { struct CodeItem; @@ -38,8 +39,8 @@ class DexCache; } // namespace mirror class ArtMethod; +class CompiledCodeStorage; class CompiledMethod; -class CompiledMethodStorage; class CompilerOptions; class DexFile; template<class T> class Handle; @@ -52,9 +53,9 @@ class Compiler { kOptimizing }; - static Compiler* Create(const CompilerOptions& compiler_options, - CompiledMethodStorage* storage, - Kind kind); + EXPORT static Compiler* Create(const CompilerOptions& compiler_options, + CompiledCodeStorage* storage, + Kind kind); virtual bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const = 0; @@ -99,7 +100,7 @@ class Compiler { protected: Compiler(const CompilerOptions& compiler_options, - CompiledMethodStorage* storage, + CompiledCodeStorage* storage, uint64_t warning) : compiler_options_(compiler_options), storage_(storage), @@ -110,13 +111,13 @@ class Compiler { return compiler_options_; } - CompiledMethodStorage* GetCompiledMethodStorage() const { + CompiledCodeStorage* GetCompiledCodeStorage() const { return storage_; } private: const CompilerOptions& compiler_options_; - CompiledMethodStorage* const storage_; + CompiledCodeStorage* const storage_; const uint64_t maximum_compilation_time_before_warning_; DISALLOW_COPY_AND_ASSIGN(Compiler); diff --git a/compiler/compiler_reflection_test.cc b/compiler/compiler_reflection_test.cc new file mode 100644 index 0000000000..f3c07db136 --- /dev/null +++ b/compiler/compiler_reflection_test.cc @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "reflection.h" + +#include "base/macros.h" +#include "class_linker.h" +#include "common_compiler_test.h" +#include "handle_scope-inl.h" +#include "jni/jni_internal.h" +#include "mirror/class.h" +#include "mirror/class_loader.h" + +namespace art HIDDEN { + +class CompilerReflectionTest : public CommonCompilerTest {}; + +TEST_F(CompilerReflectionTest, StaticMainMethod) { + ScopedObjectAccess soa(Thread::Current()); + jobject jclass_loader = LoadDex("Main"); + StackHandleScope<1> hs(soa.Self()); + Handle<mirror::ClassLoader> class_loader( + hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader))); + + ObjPtr<mirror::Class> klass = class_linker_->FindClass(soa.Self(), "LMain;", class_loader); + ASSERT_TRUE(klass != nullptr); + + ArtMethod* method = klass->FindClassMethod("main", + "([Ljava/lang/String;)V", + kRuntimePointerSize); + ASSERT_TRUE(method != nullptr); + ASSERT_TRUE(method->IsStatic()); + + CompileMethod(method); + + // Start runtime. + bool started = runtime_->Start(); + CHECK(started); + soa.Self()->TransitionFromSuspendedToRunnable(); + + jvalue args[1]; + args[0].l = nullptr; + InvokeWithJValues(soa, nullptr, jni::EncodeArtMethod(method), args); +} + +} // namespace art diff --git a/compiler/debug/debug_info.h b/compiler/debug/debug_info.h index 04c6991ea3..4027f114ec 100644 --- a/compiler/debug/debug_info.h +++ b/compiler/debug/debug_info.h @@ -20,9 +20,10 @@ #include <map> #include "base/array_ref.h" +#include "base/macros.h" #include "method_debug_info.h" -namespace art { +namespace art HIDDEN { class DexFile; namespace debug { diff --git a/compiler/debug/dwarf/dwarf_test.cc b/compiler/debug/dwarf/dwarf_test.cc index 8897e45584..14c92b2380 100644 --- a/compiler/debug/dwarf/dwarf_test.cc +++ b/compiler/debug/dwarf/dwarf_test.cc @@ -23,7 +23,7 @@ #include "dwarf/headers.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { namespace dwarf { // Run the tests only on host since we need objdump. diff --git a/compiler/debug/dwarf/dwarf_test.h b/compiler/debug/dwarf/dwarf_test.h index bad986a330..1a0a798d74 100644 --- a/compiler/debug/dwarf/dwarf_test.h +++ b/compiler/debug/dwarf/dwarf_test.h @@ -26,6 +26,7 @@ #include <set> #include <string> +#include "base/macros.h" #include "base/os.h" #include "base/unix_file/fd_file.h" #include "common_compiler_test.h" @@ -33,7 +34,7 @@ #include "gtest/gtest.h" #include "stream/file_output_stream.h" -namespace art { +namespace art HIDDEN { namespace dwarf { #define DW_CHECK(substring) Check(substring, false, __FILE__, __LINE__) diff --git a/compiler/debug/elf_compilation_unit.h b/compiler/debug/elf_compilation_unit.h index b1d89ebeb2..1d7523c350 100644 --- a/compiler/debug/elf_compilation_unit.h +++ b/compiler/debug/elf_compilation_unit.h @@ -19,9 +19,10 @@ #include <vector> +#include "base/macros.h" #include "debug/method_debug_info.h" -namespace art { +namespace art HIDDEN { namespace debug { struct ElfCompilationUnit { diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h index 094e8871b9..6b72262e26 100644 --- a/compiler/debug/elf_debug_frame_writer.h +++ b/compiler/debug/elf_debug_frame_writer.h @@ -20,13 +20,14 @@ #include <vector> #include "arch/instruction_set.h" +#include "base/macros.h" #include "debug/method_debug_info.h" #include "dwarf/debug_frame_opcode_writer.h" #include "dwarf/dwarf_constants.h" #include "dwarf/headers.h" #include "elf/elf_builder.h" -namespace art { +namespace art HIDDEN { namespace debug { static constexpr bool kWriteDebugFrameHdr = false; @@ -88,6 +89,10 @@ static void WriteCIE(InstructionSet isa, /*inout*/ std::vector<uint8_t>* buffer) WriteCIE(is64bit, return_reg, opcodes, buffer); return; } + case InstructionSet::kRiscv64: { + UNIMPLEMENTED(FATAL); + return; + } case InstructionSet::kX86: { // FIXME: Add fp registers once libunwind adds support for them. Bug: 20491296 constexpr bool generate_opcodes_for_x86_fp = false; diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h index 986c7e820f..9915a24a5f 100644 --- a/compiler/debug/elf_debug_info_writer.h +++ b/compiler/debug/elf_debug_info_writer.h @@ -22,6 +22,7 @@ #include <vector> #include "art_field-inl.h" +#include "base/macros.h" #include "debug/elf_compilation_unit.h" #include "debug/elf_debug_loc_writer.h" #include "debug/method_debug_info.h" @@ -32,14 +33,14 @@ #include "dwarf/debug_info_entry_writer.h" #include "elf/elf_builder.h" #include "heap_poisoning.h" -#include "linear_alloc.h" +#include "linear_alloc-inl.h" #include "mirror/array.h" #include "mirror/class-inl.h" #include "mirror/class.h" #include "oat_file.h" #include "obj_ptr-inl.h" -namespace art { +namespace art HIDDEN { namespace debug { static std::vector<const char*> GetParamNames(const MethodDebugInfo* mi) { @@ -478,7 +479,9 @@ class ElfCompilationUnitWriter { if (methods_ptr == nullptr) { // Some types might have no methods. Allocate empty array instead. LinearAlloc* allocator = Runtime::Current()->GetLinearAlloc(); - void* storage = allocator->Alloc(Thread::Current(), sizeof(LengthPrefixedArray<ArtMethod>)); + void* storage = allocator->Alloc(Thread::Current(), + sizeof(LengthPrefixedArray<ArtMethod>), + LinearAllocKind::kNoGCRoots); methods_ptr = new (storage) LengthPrefixedArray<ArtMethod>(0); type->SetMethodsPtr(methods_ptr, 0, 0); DCHECK(type->GetMethodsPtr() != nullptr); diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h index 8d62747c66..4896bc1e9b 100644 --- a/compiler/debug/elf_debug_line_writer.h +++ b/compiler/debug/elf_debug_line_writer.h @@ -20,6 +20,7 @@ #include <unordered_set> #include <vector> +#include "base/macros.h" #include "debug/elf_compilation_unit.h" #include "debug/src_map_elem.h" #include "dex/dex_file-inl.h" @@ -29,7 +30,7 @@ #include "oat_file.h" #include "stack_map.h" -namespace art { +namespace art HIDDEN { namespace debug { using PositionInfos = std::vector<DexFile::PositionInfo>; @@ -73,6 +74,7 @@ class ElfDebugLineWriter { code_factor_bits_ = 2; // 32-bit instructions break; case InstructionSet::kNone: + case InstructionSet::kRiscv64: case InstructionSet::kX86: case InstructionSet::kX86_64: break; diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h index 37ab948119..8cf476ed2d 100644 --- a/compiler/debug/elf_debug_loc_writer.h +++ b/compiler/debug/elf_debug_loc_writer.h @@ -21,13 +21,13 @@ #include <map> #include "arch/instruction_set.h" -#include "compiled_method.h" +#include "base/macros.h" #include "debug/method_debug_info.h" #include "dwarf/debug_info_entry_writer.h" #include "dwarf/register.h" #include "stack_map.h" -namespace art { +namespace art HIDDEN { namespace debug { using Reg = dwarf::Reg; @@ -38,6 +38,8 @@ static Reg GetDwarfCoreReg(InstructionSet isa, int machine_reg) { return Reg::ArmCore(machine_reg); case InstructionSet::kArm64: return Reg::Arm64Core(machine_reg); + case InstructionSet::kRiscv64: + return Reg::Riscv64Core(machine_reg); case InstructionSet::kX86: return Reg::X86Core(machine_reg); case InstructionSet::kX86_64: @@ -55,6 +57,8 @@ static Reg GetDwarfFpReg(InstructionSet isa, int machine_reg) { return Reg::ArmFp(machine_reg); case InstructionSet::kArm64: return Reg::Arm64Fp(machine_reg); + case InstructionSet::kRiscv64: + return Reg::Riscv64Fp(machine_reg); case InstructionSet::kX86: return Reg::X86Fp(machine_reg); case InstructionSet::kX86_64: diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc index 765a81d4f8..8f64d73aa7 100644 --- a/compiler/debug/elf_debug_writer.cc +++ b/compiler/debug/elf_debug_writer.cc @@ -38,7 +38,7 @@ #include "oat.h" #include "stream/vector_output_stream.h" -namespace art { +namespace art HIDDEN { namespace debug { using ElfRuntimeTypes = std::conditional<sizeof(void*) == 4, ElfTypes32, ElfTypes64>::type; @@ -208,7 +208,8 @@ std::vector<uint8_t> MakeElfFileForJIT( using Reader = ElfDebugReader<ElfTypes>; Reader reader(buffer); reader.VisitFunctionSymbols([&](Elf_Sym sym, const char*) { - DCHECK_EQ(sym.st_value, method_info.code_address + CompiledMethod::CodeDelta(isa)); + DCHECK_EQ(sym.st_value, + method_info.code_address + GetInstructionSetEntryPointAdjustment(isa)); DCHECK_EQ(sym.st_size, method_info.code_size); num_syms++; }); diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h index 1ce3c6f6f8..72b028cb97 100644 --- a/compiler/debug/elf_debug_writer.h +++ b/compiler/debug/elf_debug_writer.h @@ -27,7 +27,7 @@ #include "dwarf/dwarf_constants.h" #include "elf/elf_builder.h" -namespace art { +namespace art HIDDEN { class OatHeader; struct JITCodeEntry; namespace mirror { @@ -37,11 +37,11 @@ namespace debug { struct MethodDebugInfo; template <typename ElfTypes> -void WriteDebugInfo( +EXPORT void WriteDebugInfo( ElfBuilder<ElfTypes>* builder, const DebugInfo& debug_info); -std::vector<uint8_t> MakeMiniDebugInfo( +EXPORT std::vector<uint8_t> MakeMiniDebugInfo( InstructionSet isa, const InstructionSetFeatures* features, uint64_t text_section_address, diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h index 410f704582..fcd6696fa8 100644 --- a/compiler/debug/elf_symtab_writer.h +++ b/compiler/debug/elf_symtab_writer.h @@ -21,6 +21,7 @@ #include <unordered_set> #include <unordered_map> +#include "base/macros.h" #include "base/utils.h" #include "debug/debug_info.h" #include "debug/method_debug_info.h" @@ -29,7 +30,7 @@ #include "dex/dex_file-inl.h" #include "elf/elf_builder.h" -namespace art { +namespace art HIDDEN { namespace debug { // The ARM specification defines three special mapping symbols @@ -153,7 +154,7 @@ static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, uint64_t address = info.code_address; address += info.is_code_address_text_relative ? text->GetAddress() : 0; // Add in code delta, e.g., thumb bit 0 for Thumb2 code. - address += CompiledMethod::CodeDelta(info.isa); + address += GetInstructionSetEntryPointAdjustment(info.isa); symtab->Add(name_offset, text, address, info.code_size, STB_GLOBAL, STT_FUNC); } // Add symbols for dex files. diff --git a/compiler/debug/method_debug_info.h b/compiler/debug/method_debug_info.h index 152db6eaf0..b83c6e2052 100644 --- a/compiler/debug/method_debug_info.h +++ b/compiler/debug/method_debug_info.h @@ -21,9 +21,10 @@ #include "arch/instruction_set.h" #include "base/array_ref.h" +#include "base/macros.h" #include "dex/dex_file.h" -namespace art { +namespace art HIDDEN { namespace debug { struct MethodDebugInfo { diff --git a/compiler/debug/src_map_elem.h b/compiler/debug/src_map_elem.h index 5286b8c4dc..646a1f0fc7 100644 --- a/compiler/debug/src_map_elem.h +++ b/compiler/debug/src_map_elem.h @@ -19,7 +19,9 @@ #include <stdint.h> -namespace art { +#include "base/macros.h" + +namespace art HIDDEN { class SrcMapElem { public: diff --git a/compiler/debug/src_map_elem_test.cc b/compiler/debug/src_map_elem_test.cc index ceaa53fa99..bdbafd5b40 100644 --- a/compiler/debug/src_map_elem_test.cc +++ b/compiler/debug/src_map_elem_test.cc @@ -20,7 +20,7 @@ #include "base/macros.h" -namespace art { +namespace art HIDDEN { namespace debug { TEST(SrcMapElem, Operators) { diff --git a/compiler/dex/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc index 32019657d3..381db3d21d 100644 --- a/compiler/dex/inline_method_analyser.cc +++ b/compiler/dex/inline_method_analyser.cc @@ -33,7 +33,7 @@ * only to allow the debugger to check whether a method has been inlined. */ -namespace art { +namespace art HIDDEN { namespace { // anonymous namespace diff --git a/compiler/dex/inline_method_analyser.h b/compiler/dex/inline_method_analyser.h index e1d652a642..99d07c6152 100644 --- a/compiler/dex/inline_method_analyser.h +++ b/compiler/dex/inline_method_analyser.h @@ -28,7 +28,7 @@ * only to allow the debugger to check whether a method has been inlined. */ -namespace art { +namespace art HIDDEN { class CodeItemDataAccessor; diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc deleted file mode 100644 index b819d0effa..0000000000 --- a/compiler/dex/verification_results.cc +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "verification_results.h" - -#include <android-base/logging.h> - -#include "base/mutex-inl.h" -#include "base/stl_util.h" -#include "runtime.h" -#include "thread-current-inl.h" -#include "thread.h" - -namespace art { - -VerificationResults::VerificationResults() - : uncompilable_methods_lock_("compiler uncompilable methods lock"), - rejected_classes_lock_("compiler rejected classes lock") {} - -// Non-inline version of the destructor, as it does some implicit work not worth -// inlining. -VerificationResults::~VerificationResults() {} - -void VerificationResults::AddRejectedClass(ClassReference ref) { - { - WriterMutexLock mu(Thread::Current(), rejected_classes_lock_); - rejected_classes_.insert(ref); - } - DCHECK(IsClassRejected(ref)); -} - -bool VerificationResults::IsClassRejected(ClassReference ref) const { - ReaderMutexLock mu(Thread::Current(), rejected_classes_lock_); - return rejected_classes_.find(ref) != rejected_classes_.end(); -} - -void VerificationResults::AddUncompilableMethod(MethodReference ref) { - { - WriterMutexLock mu(Thread::Current(), uncompilable_methods_lock_); - uncompilable_methods_.insert(ref); - } - DCHECK(IsUncompilableMethod(ref)); -} - -bool VerificationResults::IsUncompilableMethod(MethodReference ref) const { - ReaderMutexLock mu(Thread::Current(), uncompilable_methods_lock_); - return uncompilable_methods_.find(ref) != uncompilable_methods_.end(); -} - - -} // namespace art diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h deleted file mode 100644 index b294ed3020..0000000000 --- a/compiler/dex/verification_results.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEX_VERIFICATION_RESULTS_H_ -#define ART_COMPILER_DEX_VERIFICATION_RESULTS_H_ - -#include <set> - -#include "base/macros.h" -#include "base/mutex.h" -#include "dex/class_reference.h" -#include "dex/method_reference.h" - -namespace art { - -namespace verifier { -class VerifierDepsTest; -} // namespace verifier - -// Used by CompilerCallbacks to track verification information from the Runtime. -class VerificationResults { - public: - VerificationResults(); - ~VerificationResults(); - - void AddRejectedClass(ClassReference ref) REQUIRES(!rejected_classes_lock_); - bool IsClassRejected(ClassReference ref) const REQUIRES(!rejected_classes_lock_); - - void AddUncompilableMethod(MethodReference ref) REQUIRES(!uncompilable_methods_lock_); - bool IsUncompilableMethod(MethodReference ref) const REQUIRES(!uncompilable_methods_lock_); - - private: - // TODO: External locking during CompilerDriver::PreCompile(), no locking during compilation. - mutable ReaderWriterMutex uncompilable_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - std::set<MethodReference> uncompilable_methods_ GUARDED_BY(uncompilable_methods_lock_); - - // Rejected classes. - // TODO: External locking during CompilerDriver::PreCompile(), no locking during compilation. - mutable ReaderWriterMutex rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - std::set<ClassReference> rejected_classes_ GUARDED_BY(rejected_classes_lock_); - - friend class verifier::VerifierDepsTest; -}; - -} // namespace art - -#endif // ART_COMPILER_DEX_VERIFICATION_RESULTS_H_ diff --git a/compiler/driver/compiled_code_storage.h b/compiler/driver/compiled_code_storage.h new file mode 100644 index 0000000000..cef7398ec1 --- /dev/null +++ b/compiler/driver/compiled_code_storage.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DRIVER_COMPILED_CODE_STORAGE_H_ +#define ART_COMPILER_DRIVER_COMPILED_CODE_STORAGE_H_ + +#include <string> + +#include "base/array_ref.h" +#include "base/macros.h" + +namespace art HIDDEN { + +namespace linker { +class LinkerPatch; +} // namespace linker + +class CompiledMethod; +enum class InstructionSet; + +// Interface for storing AOT-compiled artifacts. +// These artifacts include compiled method code and related stack maps and +// linker patches as well as the compiled thunk code required for some kinds +// of linker patches. +// +// This interface is used for passing AOT-compiled code and metadata produced +// by the `libart-compiler` to `dex2oat`. The `CompiledMethod` created by +// `dex2oat` is completely opaque to the `libart-compiler`. +class CompiledCodeStorage { + public: + virtual CompiledMethod* CreateCompiledMethod(InstructionSet instruction_set, + ArrayRef<const uint8_t> code, + ArrayRef<const uint8_t> stack_map, + ArrayRef<const uint8_t> cfi, + ArrayRef<const linker::LinkerPatch> patches, + bool is_intrinsic) = 0; + + // TODO: Rewrite the interface for passing thunks to the `dex2oat` to reduce + // locking. The `OptimizingCompiler` is currently calling `GetThunkCode()` + // and locking a mutex there for every `LinkerPatch` that needs a thunk to + // check whether we need to compile it. Using a thunk compiler interface, + // we could drive this from the `dex2oat` side and lock the mutex at most + // once per `CreateCompiledMethod()` for any number of patches. + virtual ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& patch, + /*out*/ std::string* debug_name = nullptr) = 0; + virtual void SetThunkCode(const linker::LinkerPatch& patch, + ArrayRef<const uint8_t> code, + const std::string& debug_name) = 0; + + protected: + CompiledCodeStorage() {} + ~CompiledCodeStorage() {} + + private: + DISALLOW_COPY_AND_ASSIGN(CompiledCodeStorage); +}; + +} // namespace art + +#endif // ART_COMPILER_DRIVER_COMPILED_CODE_STORAGE_H_ diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc deleted file mode 100644 index 4857ec0931..0000000000 --- a/compiler/driver/compiled_method_storage.cc +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <algorithm> -#include <ostream> - -#include "compiled_method_storage.h" - -#include <android-base/logging.h> - -#include "base/data_hash.h" -#include "base/utils.h" -#include "compiled_method.h" -#include "linker/linker_patch.h" -#include "thread-current-inl.h" -#include "utils/dedupe_set-inl.h" -#include "utils/swap_space.h" - -namespace art { - -namespace { // anonymous namespace - -template <typename T> -const LengthPrefixedArray<T>* CopyArray(SwapSpace* swap_space, const ArrayRef<const T>& array) { - DCHECK(!array.empty()); - SwapAllocator<uint8_t> allocator(swap_space); - void* storage = allocator.allocate(LengthPrefixedArray<T>::ComputeSize(array.size())); - LengthPrefixedArray<T>* array_copy = new(storage) LengthPrefixedArray<T>(array.size()); - std::copy(array.begin(), array.end(), array_copy->begin()); - return array_copy; -} - -template <typename T> -void ReleaseArray(SwapSpace* swap_space, const LengthPrefixedArray<T>* array) { - SwapAllocator<uint8_t> allocator(swap_space); - size_t size = LengthPrefixedArray<T>::ComputeSize(array->size()); - array->~LengthPrefixedArray<T>(); - allocator.deallocate(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(array)), size); -} - -} // anonymous namespace - -template <typename T, typename DedupeSetType> -inline const LengthPrefixedArray<T>* CompiledMethodStorage::AllocateOrDeduplicateArray( - const ArrayRef<const T>& data, - DedupeSetType* dedupe_set) { - if (data.empty()) { - return nullptr; - } else if (!DedupeEnabled()) { - return CopyArray(swap_space_.get(), data); - } else { - return dedupe_set->Add(Thread::Current(), data); - } -} - -template <typename T> -inline void CompiledMethodStorage::ReleaseArrayIfNotDeduplicated( - const LengthPrefixedArray<T>* array) { - if (array != nullptr && !DedupeEnabled()) { - ReleaseArray(swap_space_.get(), array); - } -} - -template <typename ContentType> -class CompiledMethodStorage::DedupeHashFunc { - private: - static constexpr bool kUseMurmur3Hash = true; - - public: - size_t operator()(const ArrayRef<ContentType>& array) const { - return DataHash()(array); - } -}; - -template <typename T> -class CompiledMethodStorage::LengthPrefixedArrayAlloc { - public: - explicit LengthPrefixedArrayAlloc(SwapSpace* swap_space) - : swap_space_(swap_space) { - } - - const LengthPrefixedArray<T>* Copy(const ArrayRef<const T>& array) { - return CopyArray(swap_space_, array); - } - - void Destroy(const LengthPrefixedArray<T>* array) { - ReleaseArray(swap_space_, array); - } - - private: - SwapSpace* const swap_space_; -}; - -class CompiledMethodStorage::ThunkMapKey { - public: - ThunkMapKey(linker::LinkerPatch::Type type, uint32_t custom_value1, uint32_t custom_value2) - : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) {} - - bool operator<(const ThunkMapKey& other) const { - if (custom_value1_ != other.custom_value1_) { - return custom_value1_ < other.custom_value1_; - } - if (custom_value2_ != other.custom_value2_) { - return custom_value2_ < other.custom_value2_; - } - return type_ < other.type_; - } - - private: - linker::LinkerPatch::Type type_; - uint32_t custom_value1_; - uint32_t custom_value2_; -}; - -class CompiledMethodStorage::ThunkMapValue { - public: - ThunkMapValue(std::vector<uint8_t, SwapAllocator<uint8_t>>&& code, - const std::string& debug_name) - : code_(std::move(code)), debug_name_(debug_name) {} - - ArrayRef<const uint8_t> GetCode() const { - return ArrayRef<const uint8_t>(code_); - } - - const std::string& GetDebugName() const { - return debug_name_; - } - - private: - std::vector<uint8_t, SwapAllocator<uint8_t>> code_; - std::string debug_name_; -}; - -CompiledMethodStorage::CompiledMethodStorage(int swap_fd) - : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)), - dedupe_enabled_(true), - dedupe_code_("dedupe code", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())), - dedupe_vmap_table_("dedupe vmap table", - LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())), - dedupe_cfi_info_("dedupe cfi info", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())), - dedupe_linker_patches_("dedupe cfi info", - LengthPrefixedArrayAlloc<linker::LinkerPatch>(swap_space_.get())), - thunk_map_lock_("thunk_map_lock"), - thunk_map_(std::less<ThunkMapKey>(), SwapAllocator<ThunkMapValueType>(swap_space_.get())) { -} - -CompiledMethodStorage::~CompiledMethodStorage() { - // All done by member destructors. -} - -void CompiledMethodStorage::DumpMemoryUsage(std::ostream& os, bool extended) const { - if (swap_space_.get() != nullptr) { - const size_t swap_size = swap_space_->GetSize(); - os << " swap=" << PrettySize(swap_size) << " (" << swap_size << "B)"; - } - if (extended) { - Thread* self = Thread::Current(); - os << "\nCode dedupe: " << dedupe_code_.DumpStats(self); - os << "\nVmap table dedupe: " << dedupe_vmap_table_.DumpStats(self); - os << "\nCFI info dedupe: " << dedupe_cfi_info_.DumpStats(self); - } -} - -const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCode( - const ArrayRef<const uint8_t>& code) { - return AllocateOrDeduplicateArray(code, &dedupe_code_); -} - -void CompiledMethodStorage::ReleaseCode(const LengthPrefixedArray<uint8_t>* code) { - ReleaseArrayIfNotDeduplicated(code); -} - -size_t CompiledMethodStorage::UniqueCodeEntries() const { - DCHECK(DedupeEnabled()); - return dedupe_code_.Size(Thread::Current()); -} - -const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateVMapTable( - const ArrayRef<const uint8_t>& table) { - return AllocateOrDeduplicateArray(table, &dedupe_vmap_table_); -} - -void CompiledMethodStorage::ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table) { - ReleaseArrayIfNotDeduplicated(table); -} - -size_t CompiledMethodStorage::UniqueVMapTableEntries() const { - DCHECK(DedupeEnabled()); - return dedupe_vmap_table_.Size(Thread::Current()); -} - -const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCFIInfo( - const ArrayRef<const uint8_t>& cfi_info) { - return AllocateOrDeduplicateArray(cfi_info, &dedupe_cfi_info_); -} - -void CompiledMethodStorage::ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info) { - ReleaseArrayIfNotDeduplicated(cfi_info); -} - -size_t CompiledMethodStorage::UniqueCFIInfoEntries() const { - DCHECK(DedupeEnabled()); - return dedupe_cfi_info_.Size(Thread::Current()); -} - -const LengthPrefixedArray<linker::LinkerPatch>* CompiledMethodStorage::DeduplicateLinkerPatches( - const ArrayRef<const linker::LinkerPatch>& linker_patches) { - return AllocateOrDeduplicateArray(linker_patches, &dedupe_linker_patches_); -} - -void CompiledMethodStorage::ReleaseLinkerPatches( - const LengthPrefixedArray<linker::LinkerPatch>* linker_patches) { - ReleaseArrayIfNotDeduplicated(linker_patches); -} - -size_t CompiledMethodStorage::UniqueLinkerPatchesEntries() const { - DCHECK(DedupeEnabled()); - return dedupe_linker_patches_.Size(Thread::Current()); -} - -CompiledMethodStorage::ThunkMapKey CompiledMethodStorage::GetThunkMapKey( - const linker::LinkerPatch& linker_patch) { - uint32_t custom_value1 = 0u; - uint32_t custom_value2 = 0u; - switch (linker_patch.GetType()) { - case linker::LinkerPatch::Type::kCallEntrypoint: - custom_value1 = linker_patch.EntrypointOffset(); - break; - case linker::LinkerPatch::Type::kBakerReadBarrierBranch: - custom_value1 = linker_patch.GetBakerCustomValue1(); - custom_value2 = linker_patch.GetBakerCustomValue2(); - break; - case linker::LinkerPatch::Type::kCallRelative: - // No custom values. - break; - default: - LOG(FATAL) << "Unexpected patch type: " << linker_patch.GetType(); - UNREACHABLE(); - } - return ThunkMapKey(linker_patch.GetType(), custom_value1, custom_value2); -} - -ArrayRef<const uint8_t> CompiledMethodStorage::GetThunkCode(const linker::LinkerPatch& linker_patch, - /*out*/ std::string* debug_name) { - ThunkMapKey key = GetThunkMapKey(linker_patch); - MutexLock lock(Thread::Current(), thunk_map_lock_); - auto it = thunk_map_.find(key); - if (it != thunk_map_.end()) { - const ThunkMapValue& value = it->second; - if (debug_name != nullptr) { - *debug_name = value.GetDebugName(); - } - return value.GetCode(); - } else { - if (debug_name != nullptr) { - *debug_name = std::string(); - } - return ArrayRef<const uint8_t>(); - } -} - -void CompiledMethodStorage::SetThunkCode(const linker::LinkerPatch& linker_patch, - ArrayRef<const uint8_t> code, - const std::string& debug_name) { - DCHECK(!code.empty()); - ThunkMapKey key = GetThunkMapKey(linker_patch); - std::vector<uint8_t, SwapAllocator<uint8_t>> code_copy( - code.begin(), code.end(), SwapAllocator<uint8_t>(swap_space_.get())); - ThunkMapValue value(std::move(code_copy), debug_name); - MutexLock lock(Thread::Current(), thunk_map_lock_); - // Note: Multiple threads can try and compile the same thunk, so this may not create a new entry. - thunk_map_.emplace(key, std::move(value)); -} - -} // namespace art diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h deleted file mode 100644 index f9f34017eb..0000000000 --- a/compiler/driver/compiled_method_storage.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_ -#define ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_ - -#include <iosfwd> -#include <map> -#include <memory> - -#include "base/array_ref.h" -#include "base/length_prefixed_array.h" -#include "base/macros.h" -#include "utils/dedupe_set.h" -#include "utils/swap_space.h" - -namespace art { - -namespace linker { -class LinkerPatch; -} // namespace linker - -class CompiledMethodStorage { - public: - explicit CompiledMethodStorage(int swap_fd); - ~CompiledMethodStorage(); - - void DumpMemoryUsage(std::ostream& os, bool extended) const; - - void SetDedupeEnabled(bool dedupe_enabled) { - dedupe_enabled_ = dedupe_enabled; - } - bool DedupeEnabled() const { - return dedupe_enabled_; - } - - SwapAllocator<void> GetSwapSpaceAllocator() { - return SwapAllocator<void>(swap_space_.get()); - } - - const LengthPrefixedArray<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code); - void ReleaseCode(const LengthPrefixedArray<uint8_t>* code); - size_t UniqueCodeEntries() const; - - const LengthPrefixedArray<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& table); - void ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table); - size_t UniqueVMapTableEntries() const; - - const LengthPrefixedArray<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info); - void ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info); - size_t UniqueCFIInfoEntries() const; - - const LengthPrefixedArray<linker::LinkerPatch>* DeduplicateLinkerPatches( - const ArrayRef<const linker::LinkerPatch>& linker_patches); - void ReleaseLinkerPatches(const LengthPrefixedArray<linker::LinkerPatch>* linker_patches); - size_t UniqueLinkerPatchesEntries() const; - - // Returns the code associated with the given patch. - // If the code has not been set, returns empty data. - // If `debug_name` is not null, stores the associated debug name in `*debug_name`. - ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& linker_patch, - /*out*/ std::string* debug_name = nullptr); - - // Sets the code and debug name associated with the given patch. - void SetThunkCode(const linker::LinkerPatch& linker_patch, - ArrayRef<const uint8_t> code, - const std::string& debug_name); - - private: - class ThunkMapKey; - class ThunkMapValue; - using ThunkMapValueType = std::pair<const ThunkMapKey, ThunkMapValue>; - using ThunkMap = std::map<ThunkMapKey, - ThunkMapValue, - std::less<ThunkMapKey>, - SwapAllocator<ThunkMapValueType>>; - static_assert(std::is_same<ThunkMapValueType, ThunkMap::value_type>::value, "Value type check."); - - static ThunkMapKey GetThunkMapKey(const linker::LinkerPatch& linker_patch); - - template <typename T, typename DedupeSetType> - const LengthPrefixedArray<T>* AllocateOrDeduplicateArray(const ArrayRef<const T>& data, - DedupeSetType* dedupe_set); - - template <typename T> - void ReleaseArrayIfNotDeduplicated(const LengthPrefixedArray<T>* array); - - // DeDuplication data structures. - template <typename ContentType> - class DedupeHashFunc; - - template <typename T> - class LengthPrefixedArrayAlloc; - - template <typename T> - using ArrayDedupeSet = DedupeSet<ArrayRef<const T>, - LengthPrefixedArray<T>, - LengthPrefixedArrayAlloc<T>, - size_t, - DedupeHashFunc<const T>, - 4>; - - // Swap pool and allocator used for native allocations. May be file-backed. Needs to be first - // as other fields rely on this. - std::unique_ptr<SwapSpace> swap_space_; - - bool dedupe_enabled_; - - ArrayDedupeSet<uint8_t> dedupe_code_; - ArrayDedupeSet<uint8_t> dedupe_vmap_table_; - ArrayDedupeSet<uint8_t> dedupe_cfi_info_; - ArrayDedupeSet<linker::LinkerPatch> dedupe_linker_patches_; - - Mutex thunk_map_lock_; - ThunkMap thunk_map_ GUARDED_BY(thunk_map_lock_); - - DISALLOW_COPY_AND_ASSIGN(CompiledMethodStorage); -}; - -} // namespace art - -#endif // ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_ diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc deleted file mode 100644 index 05eacd848d..0000000000 --- a/compiler/driver/compiled_method_storage_test.cc +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "compiled_method_storage.h" - -#include <gtest/gtest.h> - -#include "compiled_method-inl.h" - -namespace art { - -TEST(CompiledMethodStorage, Deduplicate) { - CompiledMethodStorage storage(/* swap_fd= */ -1); - - ASSERT_TRUE(storage.DedupeEnabled()); // The default. - - const uint8_t raw_code1[] = { 1u, 2u, 3u }; - const uint8_t raw_code2[] = { 4u, 3u, 2u, 1u }; - ArrayRef<const uint8_t> code[] = { - ArrayRef<const uint8_t>(raw_code1), - ArrayRef<const uint8_t>(raw_code2), - }; - const uint8_t raw_vmap_table1[] = { 2, 4, 6 }; - const uint8_t raw_vmap_table2[] = { 7, 5, 3, 1 }; - ArrayRef<const uint8_t> vmap_table[] = { - ArrayRef<const uint8_t>(raw_vmap_table1), - ArrayRef<const uint8_t>(raw_vmap_table2), - }; - const uint8_t raw_cfi_info1[] = { 1, 3, 5 }; - const uint8_t raw_cfi_info2[] = { 8, 6, 4, 2 }; - ArrayRef<const uint8_t> cfi_info[] = { - ArrayRef<const uint8_t>(raw_cfi_info1), - ArrayRef<const uint8_t>(raw_cfi_info2), - }; - const linker::LinkerPatch raw_patches1[] = { - linker::LinkerPatch::IntrinsicReferencePatch(0u, 0u, 0u), - linker::LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 1u), - }; - const linker::LinkerPatch raw_patches2[] = { - linker::LinkerPatch::IntrinsicReferencePatch(0u, 0u, 0u), - linker::LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 2u), - }; - ArrayRef<const linker::LinkerPatch> patches[] = { - ArrayRef<const linker::LinkerPatch>(raw_patches1), - ArrayRef<const linker::LinkerPatch>(raw_patches2), - }; - - std::vector<CompiledMethod*> compiled_methods; - compiled_methods.reserve(1u << 4); - for (auto&& c : code) { - for (auto&& v : vmap_table) { - for (auto&& f : cfi_info) { - for (auto&& p : patches) { - compiled_methods.push_back(CompiledMethod::SwapAllocCompiledMethod( - &storage, InstructionSet::kNone, c, v, f, p)); - } - } - } - } - constexpr size_t code_bit = 1u << 3; - constexpr size_t vmap_table_bit = 1u << 2; - constexpr size_t cfi_info_bit = 1u << 1; - constexpr size_t patches_bit = 1u << 0; - CHECK_EQ(compiled_methods.size(), 1u << 4); - for (size_t i = 0; i != compiled_methods.size(); ++i) { - for (size_t j = 0; j != compiled_methods.size(); ++j) { - CompiledMethod* lhs = compiled_methods[i]; - CompiledMethod* rhs = compiled_methods[j]; - bool same_code = ((i ^ j) & code_bit) == 0u; - bool same_vmap_table = ((i ^ j) & vmap_table_bit) == 0u; - bool same_cfi_info = ((i ^ j) & cfi_info_bit) == 0u; - bool same_patches = ((i ^ j) & patches_bit) == 0u; - ASSERT_EQ(same_code, lhs->GetQuickCode().data() == rhs->GetQuickCode().data()) - << i << " " << j; - ASSERT_EQ(same_vmap_table, lhs->GetVmapTable().data() == rhs->GetVmapTable().data()) - << i << " " << j; - ASSERT_EQ(same_cfi_info, lhs->GetCFIInfo().data() == rhs->GetCFIInfo().data()) - << i << " " << j; - ASSERT_EQ(same_patches, lhs->GetPatches().data() == rhs->GetPatches().data()) - << i << " " << j; - } - } - for (CompiledMethod* method : compiled_methods) { - CompiledMethod::ReleaseSwapAllocatedCompiledMethod(&storage, method); - } -} - -} // namespace art diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 51cd999b6d..603596f3bc 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -23,6 +23,7 @@ #include "arch/instruction_set.h" #include "arch/instruction_set_features.h" +#include "art_method-inl.h" #include "base/runtime_debug.h" #include "base/string_view_cpp20.h" #include "base/variant_map.h" @@ -30,12 +31,11 @@ #include "cmdline_parser.h" #include "compiler_options_map-inl.h" #include "dex/dex_file-inl.h" -#include "dex/verification_results.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" #include "simple_compiler_options_map.h" -namespace art { +namespace art HIDDEN { CompilerOptions::CompilerOptions() : compiler_filter_(CompilerFilter::kDefaultCompilerFilter), @@ -48,7 +48,6 @@ CompilerOptions::CompilerOptions() no_inline_from_(), dex_files_for_oat_file_(), image_classes_(), - verification_results_(nullptr), compiler_type_(CompilerType::kAotCompiler), image_type_(ImageType::kNone), multi_image_(false), @@ -146,14 +145,34 @@ bool CompilerOptions::ParseCompilerOptions(const std::vector<std::string>& optio bool CompilerOptions::IsImageClass(const char* descriptor) const { // Historical note: We used to hold the set indirectly and there was a distinction between an - // empty set and a null, null meaning to include all classes. However, the distiction has been + // empty set and a null, null meaning to include all classes. However, the distinction has been // removed; if we don't have a profile, we treat it as an empty set of classes. b/77340429 return image_classes_.find(std::string_view(descriptor)) != image_classes_.end(); } -const VerificationResults* CompilerOptions::GetVerificationResults() const { - DCHECK(Runtime::Current()->IsAotCompiler()); - return verification_results_; +bool CompilerOptions::IsPreloadedClass(const char* pretty_descriptor) const { + return preloaded_classes_.find(std::string_view(pretty_descriptor)) != preloaded_classes_.end(); +} + +bool CompilerOptions::ShouldCompileWithClinitCheck(ArtMethod* method) const { + if (method != nullptr && + Runtime::Current()->IsAotCompiler() && + method->IsStatic() && + !method->IsConstructor() && + // Compiled code for native methods never do a clinit check, so we may put the resolution + // trampoline for native methods. This means that it's possible post zygote fork for the + // entry to be dirtied. We could resolve this by either: + // - Make these methods use the generic JNI entrypoint, but that's not + // desirable for a method that is in the profile. + // - Ensure the declaring class of such native methods are always in the + // preloaded-classes list. + // - Emit the clinit check in the compiled code of native methods. + !method->IsNative()) { + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> cls = method->GetDeclaringClass<kWithoutReadBarrier>(); + return cls->IsInBootImageAndNotInPreloadedClasses(); + } + return false; } } // namespace art diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 1bffdb11ed..c8a41ce24b 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -30,7 +30,7 @@ #include "base/utils.h" #include "optimizing/register_allocator.h" -namespace art { +namespace art HIDDEN { namespace jit { class JitCompiler; @@ -44,11 +44,11 @@ namespace linker { class Arm64RelativePatcherTest; } // namespace linker +class ArtMethod; class DexFile; enum class InstructionSet; class InstructionSetFeatures; class ProfileCompilationInfo; -class VerificationResults; // Enum for CheckProfileMethodsCompiled. Outside CompilerOptions so it can be forward-declared. enum class ProfileMethodsCheck : uint8_t { @@ -83,8 +83,8 @@ class CompilerOptions final { kAppImage, // Creating app image. }; - CompilerOptions(); - ~CompilerOptions(); + EXPORT CompilerOptions(); + EXPORT ~CompilerOptions(); CompilerFilter::Filter GetCompilerFilter() const { return compiler_filter_; @@ -114,12 +114,10 @@ class CompilerOptions final { return compiler_filter_ == CompilerFilter::kAssumeVerified; } - bool VerifyAtRuntime() const { - return compiler_filter_ == CompilerFilter::kExtract; - } - bool IsAnyCompilationEnabled() const { - return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_); + return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_) && + // TODO(riscv64): remove this when we have compiler support for RISC-V + GetInstructionSet() != InstructionSet::kRiscv64; } size_t GetHugeMethodThreshold() const { @@ -298,9 +296,11 @@ class CompilerOptions final { return image_classes_; } - bool IsImageClass(const char* descriptor) const; + EXPORT bool IsImageClass(const char* descriptor) const; - const VerificationResults* GetVerificationResults() const; + // Returns whether the given `pretty_descriptor` is in the list of preloaded + // classes. `pretty_descriptor` should be the result of calling `PrettyDescriptor`. + EXPORT bool IsPreloadedClass(const char* pretty_descriptor) const; bool ParseCompilerOptions(const std::vector<std::string>& options, bool ignore_unrecognized, @@ -383,9 +383,15 @@ class CompilerOptions final { return ContainsElement(GetDexFilesForOatFile(), dex_file); } + // If this is a static non-constructor method in the boot classpath, and its class isn't + // initialized at compile-time, or won't be initialized by the zygote, add + // initialization checks at entry. This will avoid the need of trampolines + // which at runtime we will need to dirty after initialization. + EXPORT bool ShouldCompileWithClinitCheck(ArtMethod* method) const; + private: - bool ParseDumpInitFailures(const std::string& option, std::string* error_msg); - bool ParseRegisterAllocationStrategy(const std::string& option, std::string* error_msg); + EXPORT bool ParseDumpInitFailures(const std::string& option, std::string* error_msg); + EXPORT bool ParseRegisterAllocationStrategy(const std::string& option, std::string* error_msg); CompilerFilter::Filter compiler_filter_; size_t huge_method_threshold_; @@ -408,8 +414,9 @@ class CompilerOptions final { // Must not be empty for real boot image, only for tests pretending to compile boot image. HashSet<std::string> image_classes_; - // Results of AOT verification. - const VerificationResults* verification_results_; + // Classes listed in the preloaded-classes file, used for boot image and + // boot image extension compilation. + HashSet<std::string> preloaded_classes_; CompilerType compiler_type_; ImageType image_type_; diff --git a/compiler/driver/compiler_options_map-inl.h b/compiler/driver/compiler_options_map-inl.h index fcbc0f2f5c..79a59625f5 100644 --- a/compiler/driver/compiler_options_map-inl.h +++ b/compiler/driver/compiler_options_map-inl.h @@ -29,7 +29,7 @@ #include "cmdline_parser.h" #include "compiler_options.h" -namespace art { +namespace art HIDDEN { template <> struct CmdlineType<CompilerFilter::Filter> : CmdlineTypeParser<CompilerFilter::Filter> { @@ -118,6 +118,7 @@ inline bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string template <typename Map, typename Builder> inline void AddCompilerOptionsArgumentParserOptions(Builder& b) { + // clang-format off b. Define("--compiler-filter=_") .template WithType<CompilerFilter::Filter>() @@ -256,6 +257,7 @@ inline void AddCompilerOptionsArgumentParserOptions(Builder& b) { .template WithType<unsigned int>() .WithHelp("Maximum solid block size for compressed images.") .IntoKey(Map::MaxImageBlockSize); + // clang-format on } #pragma GCC diagnostic pop diff --git a/compiler/driver/compiler_options_map.h b/compiler/driver/compiler_options_map.h index 7e2f8466e0..b2dd57d00e 100644 --- a/compiler/driver/compiler_options_map.h +++ b/compiler/driver/compiler_options_map.h @@ -21,10 +21,11 @@ #include <vector> #include "base/compiler_filter.h" +#include "base/macros.h" #include "base/variant_map.h" #include "cmdline_types.h" -namespace art { +namespace art HIDDEN { enum class ProfileMethodsCheck : uint8_t; diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc index 0d0f074917..ccebfa9c07 100644 --- a/compiler/driver/dex_compilation_unit.cc +++ b/compiler/driver/dex_compilation_unit.cc @@ -25,7 +25,7 @@ #include "mirror/dex_cache.h" #include "scoped_thread_state_change-inl.h" -namespace art { +namespace art HIDDEN { DexCompilationUnit::DexCompilationUnit(Handle<mirror::ClassLoader> class_loader, ClassLinker* class_linker, diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h index def90fa4e1..d595c0a4b2 100644 --- a/compiler/driver/dex_compilation_unit.h +++ b/compiler/driver/dex_compilation_unit.h @@ -20,11 +20,12 @@ #include <stdint.h> #include "base/arena_object.h" +#include "base/macros.h" #include "dex/code_item_accessors.h" #include "dex/dex_file.h" #include "handle.h" -namespace art { +namespace art HIDDEN { namespace mirror { class Class; class ClassLoader; diff --git a/compiler/driver/simple_compiler_options_map.h b/compiler/driver/simple_compiler_options_map.h index e7a51a4995..6663c0c509 100644 --- a/compiler/driver/simple_compiler_options_map.h +++ b/compiler/driver/simple_compiler_options_map.h @@ -23,9 +23,10 @@ #include <memory> #include "compiler_options_map-inl.h" +#include "base/macros.h" #include "base/variant_map.h" -namespace art { +namespace art HIDDEN { template <typename TValue> struct SimpleParseArgumentMapKey : VariantMapKey<TValue> { diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc index 495398b4b3..82c4998217 100644 --- a/compiler/exception_test.cc +++ b/compiler/exception_test.cc @@ -14,6 +14,8 @@ * limitations under the License. */ +#include <android-base/test_utils.h> + #include <memory> #include <type_traits> @@ -22,6 +24,7 @@ #include "base/callee_save_type.h" #include "base/enums.h" #include "base/leb128.h" +#include "base/macros.h" #include "base/malloc_arena_pool.h" #include "class_linker.h" #include "common_runtime_test.h" @@ -42,7 +45,7 @@ #include "scoped_thread_state_change-inl.h" #include "thread.h" -namespace art { +namespace art HIDDEN { class ExceptionTest : public CommonRuntimeTest { protected: @@ -78,7 +81,12 @@ class ExceptionTest : public CommonRuntimeTest { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stack_maps(&allocator, kRuntimeISA); - stack_maps.BeginMethod(4 * sizeof(void*), 0u, 0u, 0u); + stack_maps.BeginMethod(/* frame_size_in_bytes= */ 4 * sizeof(void*), + /* core_spill_mask= */ 0u, + /* fp_spill_mask= */ 0u, + /* num_dex_registers= */ 0u, + /* baseline= */ false, + /* debuggable= */ false); stack_maps.BeginStackMapEntry(kDexPc, native_pc_offset); stack_maps.EndStackMapEntry(); stack_maps.EndMethod(code_size); @@ -86,7 +94,7 @@ class ExceptionTest : public CommonRuntimeTest { const size_t stack_maps_size = stack_map.size(); const size_t header_size = sizeof(OatQuickMethodHeader); - const size_t code_alignment = GetInstructionSetAlignment(kRuntimeISA); + const size_t code_alignment = GetInstructionSetCodeAlignment(kRuntimeISA); fake_header_code_and_maps_.resize(stack_maps_size + header_size + code_size + code_alignment); // NB: The start of the vector might not have been allocated the desired alignment. @@ -187,15 +195,24 @@ TEST_F(ExceptionTest, StackTraceElement) { fake_stack.push_back(0); } - fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc( - method_g_, kDexPc, /* is_for_catch_handler= */ false)); // return pc + OatQuickMethodHeader* header = OatQuickMethodHeader::FromEntryPoint( + method_g_->GetEntryPointFromQuickCompiledCode()); + // Untag native pc when running with hwasan since the pcs on the stack aren't tagged and we use + // this to create a fake stack. See OatQuickMethodHeader::Contains where we untag code pointers + // before comparing it with the PC from the stack. + uintptr_t native_pc = header->ToNativeQuickPc(method_g_, kDexPc); + if (running_with_hwasan()) { + // TODO(228989263): Use HWASanUntag once we have a hwasan target for tests too. HWASanUntag + // uses static checks which won't work if we don't have a dedicated target. + native_pc = (native_pc & ((1ULL << 56) - 1)); + } + fake_stack.push_back(native_pc); // return pc // Create/push fake 16byte stack frame for method g fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_)); fake_stack.push_back(0); fake_stack.push_back(0); - fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc( - method_g_, kDexPc, /* is_for_catch_handler= */ false)); // return pc + fake_stack.push_back(native_pc); // return pc. // Create/push fake 16byte stack frame for method f fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_)); diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 7002636d4e..e67236769e 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -34,13 +34,17 @@ #include "jit/jit_code_cache.h" #include "jit/jit_logger.h" -namespace art { +namespace art HIDDEN { namespace jit { JitCompiler* JitCompiler::Create() { return new JitCompiler(); } +void JitCompiler::SetDebuggableCompilerOption(bool value) { + compiler_options_->SetDebuggable(value); +} + void JitCompiler::ParseCompilerOptions() { // Special case max code units for inlining, whose default is "unset" (implictly // meaning no limit). Do this before parsing the actual passed options. @@ -85,7 +89,7 @@ void JitCompiler::ParseCompilerOptions() { if (StartsWith(option, "--instruction-set-variant=")) { const char* str = option.c_str() + strlen("--instruction-set-variant="); VLOG(compiler) << "JIT instruction set variant " << str; - instruction_set_features = InstructionSetFeatures::FromVariant( + instruction_set_features = InstructionSetFeatures::FromVariantAndHwcap( instruction_set, str, &error_msg); if (instruction_set_features == nullptr) { LOG(WARNING) << "Error parsing " << option << " message=" << error_msg; @@ -121,7 +125,7 @@ void JitCompiler::ParseCompilerOptions() { } } -extern "C" JitCompilerInterface* jit_load() { +EXPORT extern "C" JitCompilerInterface* jit_load() { VLOG(jit) << "Create jit compiler"; auto* const jit_compiler = JitCompiler::Create(); CHECK(jit_compiler != nullptr); @@ -199,6 +203,8 @@ bool JitCompiler::CompileMethod( VLOG(jit) << "Compilation of " << method->PrettyMethod() << " took " << PrettyDuration(UsToNs(duration_us)); runtime->GetMetrics()->JitMethodCompileCount()->AddOne(); + runtime->GetMetrics()->JitMethodCompileTotalTimeDelta()->Add(duration_us); + runtime->GetMetrics()->JitMethodCompileCountDelta()->AddOne(); } // Trim maps to reduce memory usage. diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h index 8e9966db0e..5a919fb612 100644 --- a/compiler/jit/jit_compiler.h +++ b/compiler/jit/jit_compiler.h @@ -17,12 +17,13 @@ #ifndef ART_COMPILER_JIT_JIT_COMPILER_H_ #define ART_COMPILER_JIT_JIT_COMPILER_H_ +#include "base/macros.h" #include "base/mutex.h" #include "compilation_kind.h" #include "jit/jit.h" -namespace art { +namespace art HIDDEN { class ArtMethod; class Compiler; @@ -50,6 +51,8 @@ class JitCompiler : public JitCompilerInterface { bool IsBaselineCompiler() const override; + void SetDebuggableCompilerOption(bool val) override; + bool GenerateDebugInfo() override; void ParseCompilerOptions() override; diff --git a/compiler/jit/jit_logger.cc b/compiler/jit/jit_logger.cc index 6b9453f525..32845260f3 100644 --- a/compiler/jit/jit_logger.cc +++ b/compiler/jit/jit_logger.cc @@ -24,7 +24,7 @@ #include "jit/jit_code_cache.h" #include "oat_file-inl.h" -namespace art { +namespace art HIDDEN { namespace jit { #ifdef ART_TARGET_ANDROID diff --git a/compiler/jit/jit_logger.h b/compiler/jit/jit_logger.h index f4ef75a5fe..9d1f3073fa 100644 --- a/compiler/jit/jit_logger.h +++ b/compiler/jit/jit_logger.h @@ -19,11 +19,11 @@ #include <memory> +#include "base/macros.h" #include "base/mutex.h" #include "base/os.h" -#include "compiled_method.h" -namespace art { +namespace art HIDDEN { class ArtMethod; diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc index 9e3bb86fb1..70cf2d4eb0 100644 --- a/compiler/jni/jni_cfi_test.cc +++ b/compiler/jni/jni_cfi_test.cc @@ -20,6 +20,7 @@ #include "arch/instruction_set.h" #include "base/arena_allocator.h" #include "base/enums.h" +#include "base/macros.h" #include "base/malloc_arena_pool.h" #include "cfi_test.h" #include "gtest/gtest.h" @@ -30,7 +31,7 @@ #include "jni/jni_cfi_test_expected.inc" -namespace art { +namespace art HIDDEN { // Run the tests only on host. #ifndef ART_TARGET_ANDROID @@ -124,22 +125,31 @@ class JNICFITest : public CFITest { TestImpl(InstructionSet::isa, #isa, expected_asm, expected_cfi); \ } +// We can't use compile-time macros for read-barrier as the introduction +// of userfaultfd-GC has made it a runtime choice. +#define TEST_ISA_ONLY_CC(isa) \ + TEST_F(JNICFITest, isa) { \ + if (kUseBakerReadBarrier && gUseReadBarrier) { \ + std::vector<uint8_t> expected_asm(expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(InstructionSet::isa, #isa, expected_asm, expected_cfi); \ + } \ + } + #ifdef ART_ENABLE_CODEGEN_arm // Run the tests for ARM only with Baker read barriers, as the // expected generated code contains a Marking Register refresh // instruction. -#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) -TEST_ISA(kThumb2) -#endif +TEST_ISA_ONLY_CC(kThumb2) #endif #ifdef ART_ENABLE_CODEGEN_arm64 // Run the tests for ARM64 only with Baker read barriers, as the // expected generated code contains a Marking Register refresh // instruction. -#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) -TEST_ISA(kArm64) -#endif +TEST_ISA_ONLY_CC(kArm64) #endif #ifdef ART_ENABLE_CODEGEN_x86 diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index 0a1f017828..397db251b8 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -22,11 +22,13 @@ #include "art_method-inl.h" #include "base/bit_utils.h" #include "base/casts.h" +#include "base/macros.h" #include "base/mem_map.h" #include "class_linker.h" #include "common_compiler_test.h" #include "compiler.h" #include "dex/dex_file.h" +#include "driver/compiler_options.h" #include "entrypoints/entrypoint_utils-inl.h" #include "gtest/gtest.h" #include "indirect_reference_table.h" @@ -43,7 +45,7 @@ #include "oat_quick_method_header.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" -#include "thread.h" +#include "thread-inl.h" extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_bar(JNIEnv*, jobject, jint count) { return count + 1; @@ -71,7 +73,7 @@ extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_sbar_1Critical(jint count) // TODO: In the Baker read barrier configuration, add checks to ensure // the Marking Register's value is correct. -namespace art { +namespace art HIDDEN { enum class JniKind { kNormal, // Regular kind of un-annotated natives. @@ -236,13 +238,14 @@ class JniCompilerTest : public CommonCompilerTest { bool direct, const char* method_name, const char* method_sig) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<2> hs(soa.Self()); + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + StackHandleScope<2> hs(self); Handle<mirror::ClassLoader> loader( hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader))); // Compile the native method before starting the runtime Handle<mirror::Class> c = - hs.NewHandle(class_linker_->FindClass(soa.Self(), "LMyClassNatives;", loader)); + hs.NewHandle(class_linker_->FindClass(self, "LMyClassNatives;", loader)); const auto pointer_size = class_linker_->GetImagePointerSize(); ArtMethod* method = c->FindClassMethod(method_name, method_sig, pointer_size); ASSERT_TRUE(method != nullptr) << method_name << " " << method_sig; @@ -251,8 +254,11 @@ class JniCompilerTest : public CommonCompilerTest { // Class initialization could replace the entrypoint, so force // the initialization before we set up the entrypoint below. class_linker_->EnsureInitialized( - soa.Self(), c, /*can_init_fields=*/ true, /*can_init_parents=*/ true); - class_linker_->MakeInitializedClassesVisiblyInitialized(soa.Self(), /*wait=*/ true); + self, c, /*can_init_fields=*/ true, /*can_init_parents=*/ true); + { + ScopedThreadSuspension sts(self, ThreadState::kNative); + class_linker_->MakeInitializedClassesVisiblyInitialized(self, /*wait=*/ true); + } } if (check_generic_jni_) { method->SetEntryPointFromQuickCompiledCode(class_linker_->GetRuntimeQuickGenericJniStub()); @@ -402,7 +408,7 @@ jobject JniCompilerTest::jobj_; jobject JniCompilerTest::class_loader_; void JniCompilerTest::AssertCallerObjectLocked(JNIEnv* env) { - Thread* self = down_cast<JNIEnvExt*>(env)->GetSelf(); + Thread* self = Thread::ForEnv(env); CHECK_EQ(self, Thread::Current()); ScopedObjectAccess soa(self); ArtMethod** caller_frame = self->GetManagedStack()->GetTopQuickFrame(); @@ -414,7 +420,7 @@ void JniCompilerTest::AssertCallerObjectLocked(JNIEnv* env) { CHECK(!caller->IsCriticalNative()); CHECK(caller->IsSynchronized()); ObjPtr<mirror::Object> lock; - if (self->GetManagedStack()->GetTopQuickFrameTag()) { + if (self->GetManagedStack()->GetTopQuickFrameGenericJniTag()) { // Generic JNI. lock = GetGenericJniSynchronizationObject(self, caller); } else if (caller->IsStatic()) { @@ -845,6 +851,7 @@ jlong Java_MyClassNatives_fooJJ_synchronized(JNIEnv* env, jobject, jlong x, jlon return x | y; } +EXPORT // Defined in `libart.so`. void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints, bool monitor_jni_entry_exit); @@ -1307,7 +1314,7 @@ void JniCompilerTest::ExceptionHandlingImpl() { CompileForTestWithCurrentJni(class_loader_, false, "synchronizedThrowException", "()V"); } } - // Start runtime to avoid re-initialization in SetupForTest. + // Start runtime to avoid re-initialization in SetUpForTest. Thread::Current()->TransitionFromSuspendedToRunnable(); bool started = runtime_->Start(); CHECK(started); @@ -1547,6 +1554,10 @@ jobject Java_MyClassNatives_staticMethodThatShouldReturnClass(JNIEnv* env, jclas } void JniCompilerTest::UpcallReturnTypeChecking_InstanceImpl() { + // Set debuggable so that the JNI compiler does not emit a fast-path that would skip the + // runtime call where we do these checks. Note that while normal gtests use the debug build + // which disables the fast path, `art_standalone_compiler_tests` run in the release build. + compiler_options_->SetDebuggable(true); SetUpForTest(false, "instanceMethodThatShouldReturnClass", "()Ljava/lang/Class;", CURRENT_JNI_WRAPPER(Java_MyClassNatives_instanceMethodThatShouldReturnClass)); @@ -1574,6 +1585,10 @@ void JniCompilerTest::UpcallReturnTypeChecking_InstanceImpl() { JNI_TEST(UpcallReturnTypeChecking_Instance) void JniCompilerTest::UpcallReturnTypeChecking_StaticImpl() { + // Set debuggable so that the JNI compiler does not emit a fast-path that would skip the + // runtime call where we do these checks. Note that while normal gtests use the debug build + // which disables the fast path, `art_standalone_compiler_tests` run in the release build. + compiler_options_->SetDebuggable(true); SetUpForTest(true, "staticMethodThatShouldReturnClass", "()Ljava/lang/Class;", CURRENT_JNI_WRAPPER(Java_MyClassNatives_staticMethodThatShouldReturnClass)); diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc index c1afdb8238..d81ca77b74 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.cc +++ b/compiler/jni/quick/arm/calling_convention_arm.cc @@ -23,7 +23,7 @@ #include "base/macros.h" #include "utils/arm/managed_register_arm.h" -namespace art { +namespace art HIDDEN { namespace arm { // @@ -199,6 +199,10 @@ ManagedRegister ArmManagedRuntimeCallingConvention::MethodRegister() { return ArmManagedRegister::FromCoreRegister(R0); } +ManagedRegister ArmManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() { + return ArmManagedRegister::FromCoreRegister(R2); +} + void ArmManagedRuntimeCallingConvention::ResetIterator(FrameOffset displacement) { ManagedRuntimeCallingConvention::ResetIterator(displacement); gpr_index_ = 1u; // Skip r0 for ArtMethod* diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h index 4526d9e759..3a09d4eaad 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.h +++ b/compiler/jni/quick/arm/calling_convention_arm.h @@ -18,9 +18,10 @@ #define ART_COMPILER_JNI_QUICK_ARM_CALLING_CONVENTION_ARM_H_ #include "base/enums.h" +#include "base/macros.h" #include "jni/quick/calling_convention.h" -namespace art { +namespace art HIDDEN { namespace arm { class ArmManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { @@ -39,6 +40,7 @@ class ArmManagedRuntimeCallingConvention final : public ManagedRuntimeCallingCon void ResetIterator(FrameOffset displacement) override; // Managed runtime calling convention ManagedRegister MethodRegister() override; + ManagedRegister ArgumentRegisterForMethodExitHook() override; void Next() override; bool IsCurrentParamInRegister() override; bool IsCurrentParamOnStack() override; diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index ec77db3dcb..e716502911 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -22,7 +22,7 @@ #include "arch/instruction_set.h" #include "utils/arm64/managed_register_arm64.h" -namespace art { +namespace art HIDDEN { namespace arm64 { static constexpr ManagedRegister kXArgumentRegisters[] = { @@ -174,6 +174,10 @@ ManagedRegister Arm64ManagedRuntimeCallingConvention::MethodRegister() { return Arm64ManagedRegister::FromXRegister(X0); } +ManagedRegister Arm64ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() { + return Arm64ManagedRegister::FromXRegister(X4); +} + bool Arm64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() { if (IsCurrentParamAFloatOrDouble()) { return itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments; diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h index 176271e3dc..f29eb15fa8 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.h +++ b/compiler/jni/quick/arm64/calling_convention_arm64.h @@ -18,9 +18,10 @@ #define ART_COMPILER_JNI_QUICK_ARM64_CALLING_CONVENTION_ARM64_H_ #include "base/enums.h" +#include "base/macros.h" #include "jni/quick/calling_convention.h" -namespace art { +namespace art HIDDEN { namespace arm64 { class Arm64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { @@ -35,6 +36,7 @@ class Arm64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingC ManagedRegister ReturnRegister() const override; // Managed runtime calling convention ManagedRegister MethodRegister() override; + ManagedRegister ArgumentRegisterForMethodExitHook() override; bool IsCurrentParamInRegister() override; bool IsCurrentParamOnStack() override; ManagedRegister CurrentParamRegister() override; diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc index eb4d3724ee..2b9da6ba1a 100644 --- a/compiler/jni/quick/calling_convention.cc +++ b/compiler/jni/quick/calling_convention.cc @@ -37,7 +37,7 @@ #include "jni/quick/x86_64/calling_convention_x86_64.h" #endif -namespace art { +namespace art HIDDEN { // Managed runtime calling convention @@ -74,6 +74,10 @@ std::unique_ptr<ManagedRuntimeCallingConvention> ManagedRuntimeCallingConvention is_static, is_synchronized, shorty)); #endif default: + UNUSED(allocator); + UNUSED(is_static); + UNUSED(is_synchronized); + UNUSED(shorty); LOG(FATAL) << "Unknown InstructionSet: " << instruction_set; UNREACHABLE(); } @@ -165,6 +169,12 @@ std::unique_ptr<JniCallingConvention> JniCallingConvention::Create(ArenaAllocato is_static, is_synchronized, is_fast_native, is_critical_native, shorty)); #endif default: + UNUSED(allocator); + UNUSED(is_static); + UNUSED(is_synchronized); + UNUSED(is_fast_native); + UNUSED(is_critical_native); + UNUSED(shorty); LOG(FATAL) << "Unknown InstructionSet: " << instruction_set; UNREACHABLE(); } diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h index e2f3bfb78c..0187b14256 100644 --- a/compiler/jni/quick/calling_convention.h +++ b/compiler/jni/quick/calling_convention.h @@ -20,11 +20,12 @@ #include "base/arena_object.h" #include "base/array_ref.h" #include "base/enums.h" +#include "base/macros.h" #include "dex/primitive.h" #include "thread.h" #include "utils/managed_register.h" -namespace art { +namespace art HIDDEN { enum class InstructionSet; @@ -244,6 +245,11 @@ class ManagedRuntimeCallingConvention : public CallingConvention { // Register that holds the incoming method argument virtual ManagedRegister MethodRegister() = 0; + // Register that is used to pass frame size for method exit hook call. This + // shouldn't be the same as the return register since method exit hook also expects + // return values in the return register. + virtual ManagedRegister ArgumentRegisterForMethodExitHook() = 0; + // Iterator interface bool HasNext(); virtual void Next(); diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 6cb50211e1..c60d97467e 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -36,7 +36,9 @@ #include "dex/dex_file-inl.h" #include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "instrumentation.h" #include "jni/jni_env_ext.h" +#include "runtime.h" #include "thread.h" #include "utils/arm/managed_register_arm.h" #include "utils/arm64/managed_register_arm64.h" @@ -47,7 +49,7 @@ #define __ jni_asm-> -namespace art { +namespace art HIDDEN { constexpr size_t kIRTCookieSize = JniCallingConvention::SavedLocalReferenceCookieSize(); @@ -68,6 +70,12 @@ static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm, ManagedRegister in_reg); template <PointerSize kPointerSize> +static void CallDecodeReferenceResult(JNIMacroAssembler<kPointerSize>* jni_asm, + JniCallingConvention* jni_conv, + ManagedRegister mr_return_reg, + size_t main_out_arg_size); + +template <PointerSize kPointerSize> static std::unique_ptr<JNIMacroAssembler<kPointerSize>> GetMacroAssembler( ArenaAllocator* allocator, InstructionSet isa, const InstructionSetFeatures* features) { return JNIMacroAssembler<kPointerSize>::Create(allocator, isa, features); @@ -101,6 +109,24 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp // i.e. if the method was annotated with @CriticalNative const bool is_critical_native = (access_flags & kAccCriticalNative) != 0u; + bool is_debuggable = compiler_options.GetDebuggable(); + bool needs_entry_exit_hooks = is_debuggable && compiler_options.IsJitCompiler(); + // We don't support JITing stubs for critical native methods in debuggable runtimes yet. + // TODO(mythria): Add support required for calling method entry / exit hooks from critical native + // methods. + DCHECK_IMPLIES(needs_entry_exit_hooks, !is_critical_native); + + // The fast-path for decoding a reference skips CheckJNI checks, so we do not inline the + // decoding in debug build or for debuggable apps (both cases enable CheckJNI by default). + bool inline_decode_reference = !kIsDebugBuild && !is_debuggable; + + // When walking the stack the top frame doesn't have a pc associated with it. We then depend on + // the invariant that we don't have JITed code when AOT code is available. In debuggable runtimes + // this invariant doesn't hold. So we tag the SP for JITed code to indentify if we are executing + // JITed code or AOT code. Since tagging involves additional instructions we tag only in + // debuggable runtimes. + bool should_tag_sp = needs_entry_exit_hooks; + VLOG(jni) << "JniCompile: Method :: " << dex_file.PrettyMethod(method_idx, /* with signature */ true) << " :: access_flags = " << std::hex << access_flags << std::dec; @@ -182,7 +208,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp // Skip this for @CriticalNative because we're not passing a `jclass` to the native method. std::unique_ptr<JNIMacroLabel> jclass_read_barrier_slow_path; std::unique_ptr<JNIMacroLabel> jclass_read_barrier_return; - if (kUseReadBarrier && is_static && LIKELY(!is_critical_native)) { + if (gUseReadBarrier && is_static && LIKELY(!is_critical_native)) { jclass_read_barrier_slow_path = __ CreateLabel(); jclass_read_barrier_return = __ CreateLabel(); @@ -219,7 +245,22 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp // because garbage collections are disabled within the execution of a // @CriticalNative method. if (LIKELY(!is_critical_native)) { - __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>()); + __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>(), should_tag_sp); + } + + // 1.5. Call any method entry hooks if required. + // For critical native methods, we don't JIT stubs in debuggable runtimes (see + // OptimizingCompiler::JitCompile). + // TODO(mythria): Add support to call method entry / exit hooks for critical native methods too. + std::unique_ptr<JNIMacroLabel> method_entry_hook_slow_path; + std::unique_ptr<JNIMacroLabel> method_entry_hook_return; + if (UNLIKELY(needs_entry_exit_hooks)) { + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); + int offset = instrumentation::Instrumentation::HaveMethodEntryListenersOffset().Int32Value(); + method_entry_hook_slow_path = __ CreateLabel(); + method_entry_hook_return = __ CreateLabel(); + __ TestByteAndJumpIfNotZero(address + offset, method_entry_hook_slow_path.get()); + __ Bind(method_entry_hook_return.get()); } // 2. Lock the object (if synchronized) and transition out of Runnable (if normal native). @@ -442,8 +483,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ Bind(transition_to_runnable_resume.get()); } - // 5.2. For methods that return a reference, do an early exception check so that the - // `JniDecodeReferenceResult()` in the main path does not need to check for exceptions. + // 5.2. For methods that return a reference, do an exception check before decoding the reference. std::unique_ptr<JNIMacroLabel> exception_slow_path = LIKELY(!is_critical_native) ? __ CreateLabel() : nullptr; if (reference_return) { @@ -462,23 +502,23 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ Bind(suspend_check_resume.get()); } - // 5.4 For methods with reference return, decode the `jobject` with `JniDecodeReferenceResult()`. + // 5.4 For methods with reference return, decode the `jobject`, either directly + // or with a call to `JniDecodeReferenceResult()`. + std::unique_ptr<JNIMacroLabel> decode_reference_slow_path; + std::unique_ptr<JNIMacroLabel> decode_reference_resume; if (reference_return) { DCHECK(!is_critical_native); - // We abuse the JNI calling convention here, that is guaranteed to support passing - // two pointer arguments, `JNIEnv*` and `jclass`/`jobject`. - main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); - ThreadOffset<kPointerSize> jni_decode_reference_result = - QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniDecodeReferenceResult); - // Pass result. - SetNativeParameter(jni_asm.get(), main_jni_conv.get(), mr_conv->ReturnRegister()); - main_jni_conv->Next(); - if (main_jni_conv->IsCurrentParamInRegister()) { - __ GetCurrentThread(main_jni_conv->CurrentParamRegister()); - __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_decode_reference_result)); + if (inline_decode_reference) { + // Decode local and JNI transition references in the main path. + decode_reference_slow_path = __ CreateLabel(); + decode_reference_resume = __ CreateLabel(); + __ DecodeJNITransitionOrLocalJObject(mr_conv->ReturnRegister(), + decode_reference_slow_path.get(), + decode_reference_resume.get()); + __ Bind(decode_reference_resume.get()); } else { - __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset()); - __ CallFromThread(jni_decode_reference_result); + CallDecodeReferenceResult<kPointerSize>( + jni_asm.get(), main_jni_conv.get(), mr_conv->ReturnRegister(), main_out_arg_size); } } // if (!is_critical_native) @@ -532,7 +572,21 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ Bind(suspend_check_resume.get()); } - // 7.5. Remove activation - need to restore callee save registers since the GC + // 7.5. Check if method exit hooks needs to be called + // For critical native methods, we don't JIT stubs in debuggable runtimes. + // TODO(mythria): Add support to call method entry / exit hooks for critical native methods too. + std::unique_ptr<JNIMacroLabel> method_exit_hook_slow_path; + std::unique_ptr<JNIMacroLabel> method_exit_hook_return; + if (UNLIKELY(needs_entry_exit_hooks)) { + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); + int offset = instrumentation::Instrumentation::RunExitHooksOffset().Int32Value(); + method_exit_hook_slow_path = __ CreateLabel(); + method_exit_hook_return = __ CreateLabel(); + __ TestByteAndJumpIfNotZero(address + offset, method_exit_hook_slow_path.get()); + __ Bind(method_exit_hook_return.get()); + } + + // 7.6. Remove activation - need to restore callee save registers since the GC // may have changed them. DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size)); if (LIKELY(!is_critical_native) || !main_jni_conv->UseTailCall()) { @@ -547,7 +601,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp // 8.1. Read barrier slow path for the declaring class in the method for a static call. // Skip this for @CriticalNative because we're not passing a `jclass` to the native method. - if (kUseReadBarrier && is_static && !is_critical_native) { + if (gUseReadBarrier && is_static && !is_critical_native) { __ Bind(jclass_read_barrier_slow_path.get()); // Construct slow path for read barrier: @@ -594,7 +648,37 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ Jump(transition_to_runnable_resume.get()); } - // 8.4. Suspend check slow path. + // 8.4. Exception poll slow path(s). + if (LIKELY(!is_critical_native)) { + __ Bind(exception_slow_path.get()); + if (reference_return) { + // We performed the exception check early, so we need to adjust SP and pop IRT frame. + if (main_out_arg_size != 0) { + jni_asm->cfi().AdjustCFAOffset(main_out_arg_size); + __ DecreaseFrameSize(main_out_arg_size); + } + PopLocalReferenceFrame<kPointerSize>( + jni_asm.get(), jni_env_reg, saved_cookie_reg, callee_save_temp); + } + DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size)); + __ DeliverPendingException(); + } + + // 8.5 Slow path for decoding the `jobject`. + if (reference_return && inline_decode_reference) { + __ Bind(decode_reference_slow_path.get()); + if (main_out_arg_size != 0) { + jni_asm->cfi().AdjustCFAOffset(main_out_arg_size); + } + CallDecodeReferenceResult<kPointerSize>( + jni_asm.get(), main_jni_conv.get(), mr_conv->ReturnRegister(), main_out_arg_size); + __ Jump(decode_reference_resume.get()); + if (main_out_arg_size != 0) { + jni_asm->cfi().AdjustCFAOffset(-main_out_arg_size); + } + } + + // 8.6. Suspend check slow path. if (UNLIKELY(is_fast_native)) { __ Bind(suspend_check_slow_path.get()); if (reference_return && main_out_arg_size != 0) { @@ -605,29 +689,34 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp if (reference_return) { // Suspend check entry point overwrites top of managed stack and leaves it clobbered. // We need to restore the top for subsequent runtime call to `JniDecodeReferenceResult()`. - __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>()); + __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>(), should_tag_sp); } if (reference_return && main_out_arg_size != 0) { __ IncreaseFrameSize(main_out_arg_size); - jni_asm->cfi().AdjustCFAOffset(-main_out_arg_size); } __ Jump(suspend_check_resume.get()); + if (reference_return && main_out_arg_size != 0) { + jni_asm->cfi().AdjustCFAOffset(-main_out_arg_size); + } } - // 8.5. Exception poll slow path(s). - if (LIKELY(!is_critical_native)) { - __ Bind(exception_slow_path.get()); - if (reference_return) { - // We performed the exception check early, so we need to adjust SP and pop IRT frame. - if (main_out_arg_size != 0) { - jni_asm->cfi().AdjustCFAOffset(main_out_arg_size); - __ DecreaseFrameSize(main_out_arg_size); - } - PopLocalReferenceFrame<kPointerSize>( - jni_asm.get(), jni_env_reg, saved_cookie_reg, callee_save_temp); - } - DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size)); - __ DeliverPendingException(); + // 8.7. Method entry / exit hooks slow paths. + if (UNLIKELY(needs_entry_exit_hooks)) { + __ Bind(method_entry_hook_slow_path.get()); + // Use Jni specific method entry hook that saves all the arguments. We have only saved the + // callee save registers at this point. So go through Jni specific stub that saves the rest + // of the live registers. + __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEntryHook)); + __ ExceptionPoll(exception_slow_path.get()); + __ Jump(method_entry_hook_return.get()); + + __ Bind(method_exit_hook_slow_path.get()); + // Method exit hooks is called just before tearing down the frame. So there are no live + // registers and we can directly call the method exit hook and don't need a Jni specific + // entrypoint. + __ Move(mr_conv->ArgumentRegisterForMethodExitHook(), managed_frame_size); + __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pMethodExitHook)); + __ Jump(method_exit_hook_return.get()); } // 9. Finalize code generation. @@ -693,6 +782,31 @@ static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm, } } +template <PointerSize kPointerSize> +static void CallDecodeReferenceResult(JNIMacroAssembler<kPointerSize>* jni_asm, + JniCallingConvention* jni_conv, + ManagedRegister mr_return_reg, + size_t main_out_arg_size) { + // We abuse the JNI calling convention here, that is guaranteed to support passing + // two pointer arguments, `JNIEnv*` and `jclass`/`jobject`. + jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); + ThreadOffset<kPointerSize> jni_decode_reference_result = + QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniDecodeReferenceResult); + // Pass result. + SetNativeParameter(jni_asm, jni_conv, mr_return_reg); + jni_conv->Next(); + if (jni_conv->IsCurrentParamInRegister()) { + __ GetCurrentThread(jni_conv->CurrentParamRegister()); + __ Call(jni_conv->CurrentParamRegister(), Offset(jni_decode_reference_result)); + } else { + __ GetCurrentThread(jni_conv->CurrentParamStackOffset()); + __ CallFromThread(jni_decode_reference_result); + } + // Note: If the native ABI returns the pointer in a register different from + // `mr_return_register`, the `JniDecodeReferenceResult` entrypoint must be + // a stub that moves the result to `mr_return_register`. +} + JniCompiledMethod ArtQuickJniCompileMethod(const CompilerOptions& compiler_options, uint32_t access_flags, uint32_t method_idx, diff --git a/compiler/jni/quick/jni_compiler.h b/compiler/jni/quick/jni_compiler.h index 52a6f3cf02..d43b2a9917 100644 --- a/compiler/jni/quick/jni_compiler.h +++ b/compiler/jni/quick/jni_compiler.h @@ -21,8 +21,9 @@ #include "arch/instruction_set.h" #include "base/array_ref.h" +#include "base/macros.h" -namespace art { +namespace art HIDDEN { class ArenaAllocator; class ArtMethod; diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc index 65be92cdce..598e8e72ff 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.cc +++ b/compiler/jni/quick/x86/calling_convention_x86.cc @@ -22,7 +22,7 @@ #include "arch/x86/jni_frame_x86.h" #include "utils/x86/managed_register_x86.h" -namespace art { +namespace art HIDDEN { namespace x86 { static constexpr ManagedRegister kManagedCoreArgumentRegisters[] = { @@ -143,6 +143,10 @@ ManagedRegister X86ManagedRuntimeCallingConvention::MethodRegister() { return X86ManagedRegister::FromCpuRegister(EAX); } +ManagedRegister X86ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() { + return X86ManagedRegister::FromCpuRegister(EBX); +} + void X86ManagedRuntimeCallingConvention::ResetIterator(FrameOffset displacement) { ManagedRuntimeCallingConvention::ResetIterator(displacement); gpr_arg_count_ = 1u; // Skip EAX for ArtMethod* diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h index cd7ef5b557..f0d663dd98 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.h +++ b/compiler/jni/quick/x86/calling_convention_x86.h @@ -18,9 +18,10 @@ #define ART_COMPILER_JNI_QUICK_X86_CALLING_CONVENTION_X86_H_ #include "base/enums.h" +#include "base/macros.h" #include "jni/quick/calling_convention.h" -namespace art { +namespace art HIDDEN { namespace x86 { class X86ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { @@ -37,6 +38,7 @@ class X86ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingCon void ResetIterator(FrameOffset displacement) override; // Managed runtime calling convention ManagedRegister MethodRegister() override; + ManagedRegister ArgumentRegisterForMethodExitHook() override; void Next() override; bool IsCurrentParamInRegister() override; bool IsCurrentParamOnStack() override; diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc index 862ee5e2be..9d0761d2f7 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc @@ -23,7 +23,7 @@ #include "base/bit_utils.h" #include "utils/x86_64/managed_register_x86_64.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { static constexpr ManagedRegister kCoreArgumentRegisters[] = { @@ -147,6 +147,10 @@ ManagedRegister X86_64ManagedRuntimeCallingConvention::MethodRegister() { return X86_64ManagedRegister::FromCpuRegister(RDI); } +ManagedRegister X86_64ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() { + return X86_64ManagedRegister::FromCpuRegister(R8); +} + bool X86_64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() { if (IsCurrentParamAFloatOrDouble()) { return itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments; diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h index 483f1f5806..859a277c60 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h @@ -18,9 +18,10 @@ #define ART_COMPILER_JNI_QUICK_X86_64_CALLING_CONVENTION_X86_64_H_ #include "base/enums.h" +#include "base/macros.h" #include "jni/quick/calling_convention.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { class X86_64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { @@ -35,6 +36,7 @@ class X86_64ManagedRuntimeCallingConvention final : public ManagedRuntimeCalling ManagedRegister ReturnRegister() const override; // Managed runtime calling convention ManagedRegister MethodRegister() override; + ManagedRegister ArgumentRegisterForMethodExitHook() override; bool IsCurrentParamInRegister() override; bool IsCurrentParamOnStack() override; ManagedRegister CurrentParamRegister() override; diff --git a/compiler/libart-compiler.map b/compiler/libart-compiler.map new file mode 100644 index 0000000000..f66052a329 --- /dev/null +++ b/compiler/libart-compiler.map @@ -0,0 +1,34 @@ +# +# Copyright (C) 2022 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +ART_COMPILER { + global: + extern "C++" { + art::debug::MakeMiniDebugInfo*; + *art::debug::WriteDebugInfo*; + art::Compiler::Create*; + art::CompilerOptions::*; + art::CreateTrampoline*; + art::IntrinsicObjects::*; + art::linker::operator*art::linker::LinkerPatch::Type*; + art::operator*art::Whence*; + }; + + jit_load; + + local: + *; +}; diff --git a/compiler/linker/linker_patch.h b/compiler/linker/linker_patch.h index 7da1e82d91..8ed7fce0ff 100644 --- a/compiler/linker/linker_patch.h +++ b/compiler/linker/linker_patch.h @@ -23,9 +23,10 @@ #include <android-base/logging.h> #include "base/bit_utils.h" +#include "base/macros.h" #include "dex/method_reference.h" -namespace art { +namespace art HIDDEN { class DexFile; @@ -328,7 +329,7 @@ class LinkerPatch { friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs); friend bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs); }; -std::ostream& operator<<(std::ostream& os, LinkerPatch::Type type); +EXPORT std::ostream& operator<<(std::ostream& os, LinkerPatch::Type type); inline bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs) { return lhs.literal_offset_ == rhs.literal_offset_ && diff --git a/compiler/linker/linker_patch_test.cc b/compiler/linker/linker_patch_test.cc index 997418c4f7..1c46da15e1 100644 --- a/compiler/linker/linker_patch_test.cc +++ b/compiler/linker/linker_patch_test.cc @@ -16,9 +16,10 @@ #include <gtest/gtest.h> +#include "base/macros.h" #include "linker_patch.h" -namespace art { +namespace art HIDDEN { namespace linker { TEST(LinkerPatch, LinkerPatchOperators) { diff --git a/compiler/linker/output_stream_test.cc b/compiler/linker/output_stream_test.cc index f1af4cb8b7..22b174fce6 100644 --- a/compiler/linker/output_stream_test.cc +++ b/compiler/linker/output_stream_test.cc @@ -16,17 +16,17 @@ #include <android-base/logging.h> +#include "base/common_art_test.h" #include "base/macros.h" #include "base/unix_file/fd_file.h" -#include "common_runtime_test.h" #include "stream/buffered_output_stream.h" #include "stream/file_output_stream.h" #include "stream/vector_output_stream.h" -namespace art { +namespace art HIDDEN { namespace linker { -class OutputStreamTest : public CommonRuntimeTest { +class OutputStreamTest : public CommonArtTest { protected: void CheckOffset(off_t expected) { off_t actual = output_stream_->Seek(0, kSeekCurrent); diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc index e1f061ae70..703584c537 100644 --- a/compiler/optimizing/block_builder.cc +++ b/compiler/optimizing/block_builder.cc @@ -22,7 +22,7 @@ #include "dex/dex_file_exception_helpers.h" #include "quicken_info.h" -namespace art { +namespace art HIDDEN { HBasicBlockBuilder::HBasicBlockBuilder(HGraph* graph, const DexFile* const dex_file, diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h index 42a3f327e7..8668ef8221 100644 --- a/compiler/optimizing/block_builder.h +++ b/compiler/optimizing/block_builder.h @@ -17,13 +17,14 @@ #ifndef ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_ +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "dex/code_item_accessors.h" #include "dex/dex_file.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class HBasicBlockBuilder : public ValueObject { public: diff --git a/compiler/optimizing/block_namer.cc b/compiler/optimizing/block_namer.cc index d30448cd23..029e26b2be 100644 --- a/compiler/optimizing/block_namer.cc +++ b/compiler/optimizing/block_namer.cc @@ -18,7 +18,7 @@ #include "nodes.h" -namespace art { +namespace art HIDDEN { std::ostream& BlockNamer::PrintName(std::ostream& os, HBasicBlock* blk) const { os << "B"; diff --git a/compiler/optimizing/block_namer.h b/compiler/optimizing/block_namer.h index ed396b9bf8..39c5973297 100644 --- a/compiler/optimizing/block_namer.h +++ b/compiler/optimizing/block_namer.h @@ -19,7 +19,9 @@ #include <ostream> -namespace art { +#include "base/macros.h" + +namespace art HIDDEN { class HBasicBlock; struct BlockNamer { diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index dad3c818fa..919abfdc49 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -24,7 +24,7 @@ #include "nodes.h" #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { class MonotonicValueRange; @@ -490,7 +490,7 @@ class MonotonicValueRange : public ValueRange { DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange); }; -class BCEVisitor : public HGraphVisitor { +class BCEVisitor final : public HGraphVisitor { public: // The least number of bounds checks that should be eliminated by triggering // the deoptimization technique. @@ -564,6 +564,19 @@ class BCEVisitor : public HGraphVisitor { early_exit_loop_.clear(); taken_test_loop_.clear(); finite_loop_.clear(); + + // We may have eliminated all bounds checks so we should update the flag. + // TODO(solanes): Do this without a linear pass of the graph? + GetGraph()->SetHasBoundsChecks(false); + for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsBoundsCheck()) { + GetGraph()->SetHasBoundsChecks(true); + return; + } + } + } } private: @@ -1818,6 +1831,7 @@ class BCEVisitor : public HGraphVisitor { HInstruction* condition, bool is_null_check = false) { HInstruction* suspend = loop->GetSuspendCheck(); + DCHECK(suspend != nullptr); block->InsertInstructionBefore(condition, block->GetLastInstruction()); DeoptimizationKind kind = is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE; @@ -1997,7 +2011,7 @@ class BCEVisitor : public HGraphVisitor { phi->SetRawInputAt(0, instruction); phi->SetRawInputAt(1, zero); if (type == DataType::Type::kReference) { - phi->SetReferenceTypeInfo(instruction->GetReferenceTypeInfo()); + phi->SetReferenceTypeInfoIfValid(instruction->GetReferenceTypeInfo()); } new_preheader->AddPhi(phi); return phi; diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h index ef08877daa..f210fa9127 100644 --- a/compiler/optimizing/bounds_check_elimination.h +++ b/compiler/optimizing/bounds_check_elimination.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_ +#include "base/macros.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class SideEffectsAnalysis; class HInductionVarAnalysis; diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 5927d681b2..929a9e7fe7 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -17,6 +17,7 @@ #include "bounds_check_elimination.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "gvn.h" #include "induction_var_analysis.h" @@ -27,7 +28,7 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for the BoundsCheckElimination tests. diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index e7826bbba3..48d1a9da2f 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -33,7 +33,7 @@ #include "ssa_builder.h" #include "thread.h" -namespace art { +namespace art HIDDEN { HGraphBuilder::HGraphBuilder(HGraph* graph, const CodeItemDebugInfoAccessor& accessor, @@ -103,7 +103,6 @@ GraphAnalysisResult HGraphBuilder::BuildGraph() { graph_->SetNumberOfVRegs(code_item_accessor_.RegistersSize()); graph_->SetNumberOfInVRegs(code_item_accessor_.InsSize()); graph_->SetMaximumNumberOfOutVRegs(code_item_accessor_.OutsSize()); - graph_->SetHasTryCatch(code_item_accessor_.TriesSize() != 0); // Use ScopedArenaAllocator for all local allocations. ScopedArenaAllocator local_allocator(graph_->GetArenaStack()); @@ -168,7 +167,6 @@ void HGraphBuilder::BuildIntrinsicGraph(ArtMethod* method) { graph_->SetNumberOfVRegs(return_vregs + num_arg_vregs); graph_->SetNumberOfInVRegs(num_arg_vregs); graph_->SetMaximumNumberOfOutVRegs(num_arg_vregs); - graph_->SetHasTryCatch(false); // Use ScopedArenaAllocator for all local allocations. ScopedArenaAllocator local_allocator(graph_->GetArenaStack()); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 580769e0f9..ef225d9a6a 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -19,12 +19,13 @@ #include "base/arena_object.h" #include "base/array_ref.h" +#include "base/macros.h" #include "dex/code_item_accessors.h" #include "dex/dex_file-inl.h" #include "dex/dex_file.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class ArtMethod; class CodeGenerator; diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc index c6232ef661..20a763cf6d 100644 --- a/compiler/optimizing/cha_guard_optimization.cc +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -16,7 +16,7 @@ #include "cha_guard_optimization.h" -namespace art { +namespace art HIDDEN { // Note we can only do CHA guard elimination/motion in a single pass, since // if a guard is not removed, another guard might be removed due to @@ -200,6 +200,7 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, block->RemoveInstruction(deopt); HInstruction* suspend = loop_info->GetSuspendCheck(); + DCHECK(suspend != nullptr); // Need a new deoptimize instruction that copies the environment // of the suspend instruction for the loop. HDeoptimize* deoptimize = new (GetGraph()->GetAllocator()) HDeoptimize( diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h index 440d51a969..5c1fdd90de 100644 --- a/compiler/optimizing/cha_guard_optimization.h +++ b/compiler/optimizing/cha_guard_optimization.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ +#include "base/macros.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Optimize CHA guards by removing/moving them. diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 27eabafb8f..c9f42b52f5 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -15,6 +15,7 @@ */ #include "code_generator.h" +#include "base/globals.h" #ifdef ART_ENABLE_CODEGEN_arm #include "code_generator_arm_vixl.h" @@ -24,6 +25,10 @@ #include "code_generator_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 +#include "code_generator_riscv64.h" +#endif + #ifdef ART_ENABLE_CODEGEN_x86 #include "code_generator_x86.h" #endif @@ -39,7 +44,6 @@ #include "base/leb128.h" #include "class_linker.h" #include "class_root-inl.h" -#include "compiled_method.h" #include "dex/bytecode_utils.h" #include "dex/code_item_accessors-inl.h" #include "graph_visualizer.h" @@ -61,7 +65,7 @@ #include "thread-current-inl.h" #include "utils/assembler.h" -namespace art { +namespace art HIDDEN { // Return whether a location is consistent with a type. static bool CheckType(DataType::Type type, Location location) { @@ -389,7 +393,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { core_spill_mask_, fpu_spill_mask_, GetGraph()->GetNumberOfVRegs(), - GetGraph()->IsCompilingBaseline()); + GetGraph()->IsCompilingBaseline(), + GetGraph()->IsDebuggable()); size_t frame_start = GetAssembler()->CodeSize(); GenerateFrameEntry(); @@ -412,7 +417,13 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); if (current->HasEnvironment()) { - // Create stackmap for HNativeDebugInfo or any instruction which calls native code. + // Catch StackMaps are dealt with later on in `RecordCatchBlockInfo`. + if (block->IsCatchBlock() && block->GetFirstInstruction() == current) { + DCHECK(current->IsNop()); + continue; + } + + // Create stackmap for HNop or any instruction which calls native code. // Note that we need correct mapping for the native PC of the call instruction, // so the runtime's stackmap is not sufficient since it is at PC after the call. MaybeRecordNativeDebugInfo(current, block->GetDexPc()); @@ -1030,6 +1041,9 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph, } #endif default: + UNUSED(allocator); + UNUSED(graph); + UNUSED(stats); return nullptr; } } @@ -1041,7 +1055,8 @@ CodeGenerator::CodeGenerator(HGraph* graph, uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) + OptimizingCompilerStats* stats, + const art::ArrayRef<const bool>& unimplemented_intrinsics) : frame_size_(0), core_spill_mask_(0), fpu_spill_mask_(0), @@ -1066,7 +1081,8 @@ CodeGenerator::CodeGenerator(HGraph* graph, is_leaf_(true), needs_suspend_check_entry_(false), requires_current_method_(false), - code_generation_data_() { + code_generation_data_(), + unimplemented_intrinsics_(unimplemented_intrinsics) { if (GetGraph()->IsCompilingOsr()) { // Make OSR methods have all registers spilled, this simplifies the logic of // jumping to the compiled code directly. @@ -1123,7 +1139,7 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, for (HBasicBlock* block : graph.GetReversePostOrder()) { if (block->IsLoopHeader()) { HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); - if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { + if (suspend_check != nullptr && !suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { loop_headers.push_back(suspend_check); } } @@ -1333,53 +1349,43 @@ void CodeGenerator::RecordCatchBlockInfo() { continue; } - uint32_t dex_pc = block->GetDexPc(); - uint32_t num_vregs = graph_->GetNumberOfVRegs(); - uint32_t native_pc = GetAddressOf(block); + // Get the outer dex_pc. We save the full environment list for DCHECK purposes in kIsDebugBuild. + std::vector<uint32_t> dex_pc_list_for_verification; + if (kIsDebugBuild) { + dex_pc_list_for_verification.push_back(block->GetDexPc()); + } + DCHECK(block->GetFirstInstruction()->IsNop()); + DCHECK(block->GetFirstInstruction()->AsNop()->NeedsEnvironment()); + HEnvironment* const environment = block->GetFirstInstruction()->GetEnvironment(); + DCHECK(environment != nullptr); + HEnvironment* outer_environment = environment; + while (outer_environment->GetParent() != nullptr) { + outer_environment = outer_environment->GetParent(); + if (kIsDebugBuild) { + dex_pc_list_for_verification.push_back(outer_environment->GetDexPc()); + } + } + + if (kIsDebugBuild) { + // dex_pc_list_for_verification is set from innnermost to outermost. Let's reverse it + // since we are expected to pass from outermost to innermost. + std::reverse(dex_pc_list_for_verification.begin(), dex_pc_list_for_verification.end()); + DCHECK_EQ(dex_pc_list_for_verification.front(), outer_environment->GetDexPc()); + } - stack_map_stream->BeginStackMapEntry(dex_pc, + uint32_t native_pc = GetAddressOf(block); + stack_map_stream->BeginStackMapEntry(outer_environment->GetDexPc(), native_pc, /* register_mask= */ 0, /* sp_mask= */ nullptr, - StackMap::Kind::Catch); - - HInstruction* current_phi = block->GetFirstPhi(); - for (size_t vreg = 0; vreg < num_vregs; ++vreg) { - while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) { - HInstruction* next_phi = current_phi->GetNext(); - DCHECK(next_phi == nullptr || - current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber()) - << "Phis need to be sorted by vreg number to keep this a linear-time loop."; - current_phi = next_phi; - } + StackMap::Kind::Catch, + /* needs_vreg_info= */ true, + dex_pc_list_for_verification); - if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); - } else { - Location location = current_phi->GetLocations()->Out(); - switch (location.GetKind()) { - case Location::kStackSlot: { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); - break; - } - case Location::kDoubleStackSlot: { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize)); - ++vreg; - DCHECK_LT(vreg, num_vregs); - break; - } - default: { - // All catch phis must be allocated to a stack slot. - LOG(FATAL) << "Unexpected kind " << location.GetKind(); - UNREACHABLE(); - } - } - } - } + EmitEnvironment(environment, + /* slow_path= */ nullptr, + /* needs_vreg_info= */ true, + /* is_for_catch_handler= */ true); stack_map_stream->EndStackMapEntry(); } @@ -1390,7 +1396,9 @@ void CodeGenerator::AddSlowPath(SlowPathCode* slow_path) { code_generation_data_->AddSlowPath(slow_path); } -void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path) { +void CodeGenerator::EmitVRegInfo(HEnvironment* environment, + SlowPathCode* slow_path, + bool is_for_catch_handler) { StackMapStream* stack_map_stream = GetStackMapStream(); // Walk over the environment, and record the location of dex registers. for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { @@ -1445,6 +1453,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p } case Location::kRegister : { + DCHECK(!is_for_catch_handler); int id = location.reg(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id); @@ -1466,6 +1475,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p } case Location::kFpuRegister : { + DCHECK(!is_for_catch_handler); int id = location.reg(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id); @@ -1487,6 +1497,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p } case Location::kFpuRegisterPair : { + DCHECK(!is_for_catch_handler); int low = location.low(); int high = location.high(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) { @@ -1508,6 +1519,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p } case Location::kRegisterPair : { + DCHECK(!is_for_catch_handler); int low = location.low(); int high = location.high(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) { @@ -1538,9 +1550,54 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p } } +void CodeGenerator::EmitVRegInfoOnlyCatchPhis(HEnvironment* environment) { + StackMapStream* stack_map_stream = GetStackMapStream(); + DCHECK(environment->GetHolder()->GetBlock()->IsCatchBlock()); + DCHECK_EQ(environment->GetHolder()->GetBlock()->GetFirstInstruction(), environment->GetHolder()); + HInstruction* current_phi = environment->GetHolder()->GetBlock()->GetFirstPhi(); + for (size_t vreg = 0; vreg < environment->Size(); ++vreg) { + while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) { + HInstruction* next_phi = current_phi->GetNext(); + DCHECK(next_phi == nullptr || + current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber()) + << "Phis need to be sorted by vreg number to keep this a linear-time loop."; + current_phi = next_phi; + } + + if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) { + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); + } else { + Location location = current_phi->GetLocations()->Out(); + switch (location.GetKind()) { + case Location::kStackSlot: { + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, + location.GetStackIndex()); + break; + } + case Location::kDoubleStackSlot: { + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, + location.GetStackIndex()); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, + location.GetHighStackIndex(kVRegSize)); + ++vreg; + DCHECK_LT(vreg, environment->Size()); + break; + } + default: { + LOG(FATAL) << "All catch phis must be allocated to a stack slot. Unexpected kind " + << location.GetKind(); + UNREACHABLE(); + } + } + } + } +} + void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path, - bool needs_vreg_info) { + bool needs_vreg_info, + bool is_for_catch_handler, + bool innermost_environment) { if (environment == nullptr) return; StackMapStream* stack_map_stream = GetStackMapStream(); @@ -1548,7 +1605,11 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, if (emit_inline_info) { // We emit the parent environment first. - EmitEnvironment(environment->GetParent(), slow_path, needs_vreg_info); + EmitEnvironment(environment->GetParent(), + slow_path, + needs_vreg_info, + is_for_catch_handler, + /* innermost_environment= */ false); stack_map_stream->BeginInlineInfoEntry(environment->GetMethod(), environment->GetDexPc(), needs_vreg_info ? environment->Size() : 0, @@ -1556,9 +1617,13 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, this); } + // If a dex register map is not required we just won't emit it. if (needs_vreg_info) { - // If a dex register map is not required we just won't emit it. - EmitVRegInfo(environment, slow_path); + if (innermost_environment && is_for_catch_handler) { + EmitVRegInfoOnlyCatchPhis(environment); + } else { + EmitVRegInfo(environment, slow_path, is_for_catch_handler); + } } if (emit_inline_info) { @@ -1671,7 +1736,7 @@ void CodeGenerator::ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, // When (non-Baker) read barriers are enabled, some instructions // use a slow path to emit a read barrier, which does not trigger // GC. - (kEmitCompilerReadBarrier && + (gUseReadBarrier && !kUseBakerReadBarrier && (instruction->IsInstanceFieldGet() || instruction->IsPredicatedInstanceFieldGet() || diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index d81a7b5382..9872efaa4a 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -26,6 +26,7 @@ #include "base/bit_utils.h" #include "base/enums.h" #include "base/globals.h" +#include "base/macros.h" #include "base/memory_region.h" #include "class_root.h" #include "dex/string_reference.h" @@ -33,13 +34,15 @@ #include "graph_visualizer.h" #include "locations.h" #include "nodes.h" +#include "oat_quick_method_header.h" #include "optimizing_compiler_stats.h" #include "read_barrier_option.h" #include "stack.h" +#include "subtype_check.h" #include "utils/assembler.h" #include "utils/label.h" -namespace art { +namespace art HIDDEN { // Binary encoding of 2^32 for type double. static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); @@ -56,8 +59,18 @@ static int32_t constexpr kPrimIntMax = 0x7fffffff; // Maximum value for a primitive long. static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); -static constexpr ReadBarrierOption kCompilerReadBarrierOption = - kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; +static const ReadBarrierOption gCompilerReadBarrierOption = + gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; + +constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); +constexpr size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); +constexpr uint32_t shifted_visibly_initialized_value = + enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); +constexpr uint32_t shifted_initializing_value = + enum_cast<uint32_t>(ClassStatus::kInitializing) << (status_lsb_position % kBitsPerByte); +constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); class Assembler; class CodeGenerator; @@ -291,6 +304,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // Returns whether we should split long moves in parallel moves. virtual bool ShouldSplitLongMoves() const { return false; } + // Returns true if `invoke` is an implemented intrinsic in this codegen's arch. + bool IsImplementedIntrinsic(HInvoke* invoke) const { + return invoke->IsIntrinsic() && + !unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())]; + } + size_t GetNumberOfCoreCalleeSaveRegisters() const { return POPCOUNT(core_callee_save_mask_); } @@ -460,7 +479,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // If the target class is in the boot image, it's non-moveable and it doesn't matter // if we compare it with a from-space or to-space reference, the result is the same. // It's OK to traverse a class hierarchy jumping between from-space and to-space. - return kEmitCompilerReadBarrier && !instance_of->GetTargetClass()->IsInBootImage(); + return gUseReadBarrier && !instance_of->GetTargetClass()->IsInBootImage(); } static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) { @@ -475,7 +494,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case TypeCheckKind::kArrayObjectCheck: case TypeCheckKind::kInterfaceCheck: { bool needs_read_barrier = - kEmitCompilerReadBarrier && !check_cast->GetTargetClass()->IsInBootImage(); + gUseReadBarrier && !check_cast->GetTargetClass()->IsInBootImage(); // We do not emit read barriers for HCheckCast, so we can get false negatives // and the slow path shall re-check and simply return if the cast is actually OK. return !needs_read_barrier; @@ -678,7 +697,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { return LocationSummary::kCallOnMainOnly; case HLoadString::LoadKind::kJitTableAddress: DCHECK(!load->NeedsEnvironment()); - return kEmitCompilerReadBarrier + return gUseReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; @@ -736,7 +755,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats); + OptimizingCompilerStats* stats, + const art::ArrayRef<const bool>& unimplemented_intrinsics); virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; @@ -836,8 +856,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { void BlockIfInRegister(Location location, bool is_out = false) const; void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path, - bool needs_vreg_info = true); - void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path); + bool needs_vreg_info = true, + bool is_for_catch_handler = false, + bool innermost_environment = true); + void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler); + void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment); static void PrepareCriticalNativeArgumentMoves( HInvokeStaticOrDirect* invoke, @@ -877,6 +900,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed. std::unique_ptr<CodeGenerationData> code_generation_data_; + // Which intrinsics we don't have handcrafted code for. + art::ArrayRef<const bool> unimplemented_intrinsics_; + friend class OptimizingCFITest; ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD); ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 2a0b481b2d..41db9a2542 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -27,7 +27,6 @@ #include "class_root-inl.h" #include "class_table.h" #include "code_generator_utils.h" -#include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" @@ -44,6 +43,7 @@ #include "mirror/var_handle.h" #include "offsets.h" #include "optimizing/common_arm64.h" +#include "optimizing/nodes.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" #include "utils/assembler.h" @@ -58,7 +58,7 @@ using vixl::EmissionCheckScope; #error "ARM64 Codegen VIXL macro-assembler macro already defined." #endif -namespace art { +namespace art HIDDEN { template<class MirrorType> class GcRoot; @@ -77,7 +77,6 @@ using helpers::InputFPRegisterAt; using helpers::InputOperandAt; using helpers::InputRegisterAt; using helpers::Int64FromLocation; -using helpers::IsConstantZeroBitPattern; using helpers::LocationFrom; using helpers::OperandFromMemOperand; using helpers::OutputCPURegister; @@ -583,7 +582,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { obj_(obj), offset_(offset), index_(index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial object // has been overwritten by (or after) the heap object reference load // to be instrumented, e.g.: @@ -762,7 +761,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) : SlowPathCodeARM64(instruction), out_(out), root_(root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { @@ -825,6 +824,9 @@ class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); + if (instruction_->IsMethodExitHook()) { + __ Mov(vixl::aarch64::x4, arm64_codegen->GetFrameSize()); + } arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); @@ -933,6 +935,33 @@ Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const return Location::RegisterLocation(x15.GetCode()); } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) @@ -943,7 +972,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, callee_saved_core_registers.GetList(), callee_saved_fp_registers.GetList(), compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), location_builder_neon_(graph, this), @@ -1169,9 +1199,21 @@ void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* in new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); + if (instruction->IsMethodExitHook()) { + // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it + // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check + // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is + // disabled in debuggable runtime. The other bit is used when this method itself requires a + // deoptimization due to redefinition. So it is safe to just check for non-zero value here. + __ Ldr(value, MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); + __ Cbnz(value, slow_path->GetEntryLabel()); + } + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); - int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); - __ Mov(temp, address + offset); + MemberOffset offset = instruction->IsMethodExitHook() ? + instrumentation::Instrumentation::HaveMethodExitListenersOffset() : + instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); + __ Mov(temp, address + offset.Int32Value()); __ Ldrb(value, MemOperand(temp, 0)); __ Cbnz(value, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -1233,6 +1275,54 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { void CodeGeneratorARM64::GenerateFrameEntry() { MacroAssembler* masm = GetVIXLAssembler(); + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + UseScratchRegisterScope temps(masm); + vixl::aarch64::Label resolution; + vixl::aarch64::Label memory_barrier; + + Register temp1 = temps.AcquireW(); + Register temp2 = temps.AcquireW(); + + // Check if we're visibly initialized. + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ Ldr(temp1, MemOperand(kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value())); + __ Ldrb(temp2, HeapOperand(temp1, status_byte_offset)); + __ Cmp(temp2, shifted_visibly_initialized_value); + __ B(hs, &frame_entry_label_); + + // Check if we're initialized and jump to code that does a memory barrier if + // so. + __ Cmp(temp2, shifted_initialized_value); + __ B(hs, &memory_barrier); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ Cmp(temp2, shifted_initializing_value); + __ B(lo, &resolution); + + __ Ldr(temp1, HeapOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArm64PointerSize>().Int32Value())); + __ Cmp(temp1, temp2); + __ B(eq, &frame_entry_label_); + __ Bind(&resolution); + + // Jump to the resolution stub. + ThreadOffset64 entrypoint_offset = + GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline); + __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value())); + __ Br(temp1.X()); + + __ Bind(&memory_barrier); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } __ Bind(&frame_entry_label_); bool do_overflow_check = @@ -1364,12 +1454,12 @@ void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* l } } -void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) { +void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool emit_null_check) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register card = temps.AcquireX(); Register temp = temps.AcquireW(); // Index within the CardTable - 32bit. vixl::aarch64::Label done; - if (value_can_be_null) { + if (emit_null_check) { __ Cbz(value, &done); } // Load the address of the card table into `card`. @@ -1391,7 +1481,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_ // of the card to mark; and 2. to load the `kCardDirty` value) saves a load // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp.X())); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&done); } } @@ -1904,11 +1994,6 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code @@ -1974,6 +2059,13 @@ bool CodeGeneratorARM64::CanUseImplicitSuspendCheck() const { void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { + if (instruction->IsNoOp()) { + if (successor != nullptr) { + __ B(codegen_->GetLabelOf(successor)); + } + return; + } + if (codegen_->CanUseImplicitSuspendCheck()) { __ Ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister)); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2051,7 +2143,7 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_field_get_with_read_barrier @@ -2107,7 +2199,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, MemOperand field = HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset()); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && + if (gUseReadBarrier && kUseBakerReadBarrier && load_type == DataType::Type::kReference) { // Object FieldGet with Baker's read barrier case. // /* HeapReference<Object> */ out = *(base + offset) @@ -2154,9 +2246,10 @@ void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - if (IsConstantZeroBitPattern(instruction->InputAt(1))) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { + HInstruction* value = instruction->InputAt(1); + if (IsZeroBitPattern(value)) { + locations->SetInAt(1, Location::ConstantLocation(value)); + } else if (DataType::IsFloatingPointType(value->GetType())) { locations->SetInAt(1, Location::RequiresFpuRegister()); } else { locations->SetInAt(1, Location::RequiresRegister()); @@ -2165,7 +2258,8 @@ void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); bool is_predicated = instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet(); @@ -2205,8 +2299,12 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, } } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - codegen_->MarkGCCard(obj, Register(value), value_can_be_null); + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) && + write_barrier_kind != WriteBarrierKind::kDontEmit) { + codegen_->MarkGCCard( + obj, + Register(value), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_predicated) { @@ -2382,7 +2480,7 @@ void LocationsBuilderARM64::VisitDataProcWithShifterOp( LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); if (instruction->GetInstrKind() == HInstruction::kNeg) { - locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); + locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0))); } else { locations->SetInAt(0, Location::RequiresRegister()); } @@ -2475,7 +2573,7 @@ void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIn // data offset constant generation out of the loop and reduce the critical path length in the // loop. locations->SetInAt(1, shift->GetValue() == 0 - ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant()) + ? Location::ConstantLocation(instruction->GetOffset()) : Location::RequiresRegister()); locations->SetInAt(2, Location::ConstantLocation(shift)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -2549,7 +2647,7 @@ void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -2605,10 +2703,10 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // does not support the HIntermediateAddress instruction. DCHECK(!((type == DataType::Type::kReference) && instruction->GetArray()->IsIntermediateAddress() && - kEmitCompilerReadBarrier && + gUseReadBarrier && !kUseBakerReadBarrier)); - if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) { // Object ArrayGet with Baker's read barrier case. // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. @@ -2750,9 +2848,10 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { instruction, needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (IsConstantZeroBitPattern(instruction->InputAt(2))) { - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetIndex())); + HInstruction* value = instruction->GetValue(); + if (IsZeroBitPattern(value)) { + locations->SetInAt(2, Location::ConstantLocation(value)); } else if (DataType::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { @@ -2871,7 +2970,11 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } } - codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + codegen_->MarkGCCard(array, value.W(), /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsLinked()); @@ -2929,10 +3032,10 @@ void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { HInstruction* length = instruction->InputAt(1); bool both_const = index->IsConstant() && length->IsConstant(); locations->SetInAt(0, both_const - ? Location::ConstantLocation(index->AsConstant()) + ? Location::ConstantLocation(index) : ARM64EncodableConstantOrRegister(index, instruction)); locations->SetInAt(1, both_const - ? Location::ConstantLocation(length->AsConstant()) + ? Location::ConstantLocation(length) : ARM64EncodableConstantOrRegister(length, instruction)); } @@ -3030,6 +3133,7 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); DataType::Type in_type = compare->InputAt(0)->GetType(); + HInstruction* rhs = compare->InputAt(1); switch (in_type) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -3039,7 +3143,7 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) { case DataType::Type::kInt32: case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare)); + locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, compare)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -3047,8 +3151,8 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) { case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, - IsFloatingPointZeroConstant(compare->InputAt(1)) - ? Location::ConstantLocation(compare->InputAt(1)->AsConstant()) + IsFloatingPointZeroConstant(rhs) + ? Location::ConstantLocation(rhs) : Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); break; @@ -3096,16 +3200,17 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { void LocationsBuilderARM64::HandleCondition(HCondition* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + HInstruction* rhs = instruction->InputAt(1); if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, - IsFloatingPointZeroConstant(instruction->InputAt(1)) - ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant()) + IsFloatingPointZeroConstant(rhs) + ? Location::ConstantLocation(rhs) : Location::RequiresFpuRegister()); } else { // Integer cases. locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); + locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, instruction)); } if (!instruction->IsEmittedAtUseSite()) { @@ -3845,12 +3950,12 @@ void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) { } } -void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetAllocator()) LocationSummary(info); +void LocationsBuilderARM64::VisitNop(HNop* nop) { + new (GetGraph()->GetAllocator()) LocationSummary(nop); } -void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) { - // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. +void InstructionCodeGeneratorARM64::VisitNop(HNop*) { + // The environment recording already happened in CodeGenerator::Compile. } void CodeGeneratorARM64::IncreaseFrame(size_t adjustment) { @@ -3893,12 +3998,15 @@ void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction } void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (kEmitCompilerReadBarrier && + if (gUseReadBarrier && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -3948,9 +4056,9 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { } locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::RequiresRegister()); } @@ -4194,9 +4302,9 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::RequiresRegister()); } @@ -5313,7 +5421,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -5327,7 +5435,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { } locations->SetOut(Location::RequiresRegister()); if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -5354,7 +5462,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA const ReadBarrierOption read_barrier_option = cls->IsInBootImage() ? kWithoutReadBarrier - : kCompilerReadBarrierOption; + : gCompilerReadBarrierOption; bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -5523,7 +5631,7 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { } else { locations->SetOut(Location::RequiresRegister()); if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -5577,7 +5685,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD temp, /* offset placeholder */ 0u, ldr_label, - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); @@ -5601,7 +5709,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD out.X(), /* offset= */ 0, /* fixup_label= */ nullptr, - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); return; } default: @@ -6156,7 +6264,10 @@ void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { } void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { @@ -6462,7 +6573,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( DataType::Type type = DataType::Type::kReference; Register out_reg = RegisterFrom(out, type); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -6503,7 +6614,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( Register out_reg = RegisterFrom(out, type); Register obj_reg = RegisterFrom(obj, type); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6538,7 +6649,7 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( DCHECK(fixup_label == nullptr || offset == 0u); Register root_reg = RegisterFrom(root, DataType::Type::kReference); if (read_barrier_option == kWithReadBarrier) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. @@ -6604,7 +6715,7 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( void CodeGeneratorARM64::GenerateIntrinsicCasMoveWithBakerReadBarrier( vixl::aarch64::Register marked_old_value, vixl::aarch64::Register old_value) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR. @@ -6626,7 +6737,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins const vixl::aarch64::MemOperand& src, bool needs_null_check, bool use_load_acquire) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the @@ -6722,7 +6833,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru uint32_t data_offset, Location index, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); static_assert( @@ -6800,7 +6911,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { // The following condition is a compile-time one, so it does not have a run-time cost. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { + if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) { // The following condition is a run-time one; it is executed after the // previous compile-time test, to avoid penalizing non-debug builds. if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { @@ -6829,7 +6940,7 @@ void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the reference load. // @@ -6854,7 +6965,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -6869,7 +6980,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the GC root load. // @@ -7003,6 +7114,7 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, vixl::aarch64::MemOperand& lock_word, vixl::aarch64::Label* slow_path, vixl::aarch64::Label* throw_npe = nullptr) { + vixl::aarch64::Label throw_npe_cont; // Load the lock word containing the rb_state. __ Ldr(ip0.W(), lock_word); // Given the numeric representation, it's enough to check the low bit of the rb_state. @@ -7014,7 +7126,7 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, "Field and array LDR offsets must be the same to reuse the same code."); // To throw NPE, we return to the fast path; the artificial dependence below does not matter. if (throw_npe != nullptr) { - __ Bind(throw_npe); + __ Bind(&throw_npe_cont); } // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), @@ -7026,6 +7138,12 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, // a memory barrier (which would be more expensive). __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); __ Br(lr); // And return back to the function. + if (throw_npe != nullptr) { + // Clear IP0 before returning to the fast path. + __ Bind(throw_npe); + __ Mov(ip0.X(), xzr); + __ B(&throw_npe_cont); + } // Note: The fake dependency is unnecessary for the slow path. } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index f4d652c29c..6190364d1d 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_ #include "base/bit_field.h" +#include "base/macros.h" #include "class_root.h" #include "code_generator.h" #include "common_arm64.h" @@ -36,7 +37,7 @@ #include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { namespace linker { class Arm64RelativePatcherTest; @@ -92,7 +93,10 @@ const vixl::aarch64::CPURegList runtime_reserved_core_registers = vixl::aarch64::CPURegList( tr, // Reserve X20 as Marking Register when emitting Baker read barriers. - ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg), + // TODO: We don't need to reserve marking-register for userfaultfd GC. But + // that would require some work in the assembler code as the right GC is + // chosen at load-time and not compile time. + (kReserveMarkingRegister ? mr : vixl::aarch64::NoCPUReg), kImplicitSuspendCheckRegister, vixl::aarch64::lr); @@ -111,9 +115,7 @@ inline Location FixedTempLocation() { const vixl::aarch64::CPURegList callee_saved_core_registers( vixl::aarch64::CPURegister::kRegister, vixl::aarch64::kXRegSize, - ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) - ? vixl::aarch64::x21.GetCode() - : vixl::aarch64::x20.GetCode()), + (kReserveMarkingRegister ? vixl::aarch64::x21.GetCode() : vixl::aarch64::x20.GetCode()), vixl::aarch64::x30.GetCode()); const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister, vixl::aarch64::kDRegSize, @@ -121,6 +123,41 @@ const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegi vixl::aarch64::d15.GetCode()); Location ARM64ReturnLocation(DataType::Type return_type); +#define UNIMPLEMENTED_INTRINSIC_LIST_ARM64(V) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + V(SystemArrayCopyByte) \ + V(SystemArrayCopyInt) \ + /* 1.8 */ \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) + class SlowPathCodeARM64 : public SlowPathCode { public: explicit SlowPathCodeARM64(HInstruction* instruction) @@ -327,7 +364,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleCondition(HCondition* instruction); @@ -615,7 +653,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Emit a write barrier. void MarkGCCard(vixl::aarch64::Register object, vixl::aarch64::Register value, - bool value_can_be_null); + bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 09fa598203..d69e77045b 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -26,7 +26,6 @@ #include "class_table.h" #include "code_generator_utils.h" #include "common_arm.h" -#include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "gc/space/image_space.h" @@ -46,7 +45,7 @@ #include "utils/assembler.h" #include "utils/stack_checks.h" -namespace art { +namespace art HIDDEN { namespace arm { namespace vixl32 = vixl::aarch32; @@ -744,7 +743,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { obj_(obj), offset_(offset), index_(index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial object // has been overwritten by (or after) the heap object reference load // to be instrumented, e.g.: @@ -922,7 +921,7 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root) : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { @@ -974,6 +973,10 @@ class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL { (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); + if (instruction_->IsMethodExitHook()) { + // Load frame size to pass to the exit hooks + __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize()); + } arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); @@ -1845,7 +1848,7 @@ static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) { DCHECK(!DataType::IsFloatingPointType(constant->GetType())); if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) { - return Location::ConstantLocation(constant->AsConstant()); + return Location::ConstantLocation(constant); } return Location::RequiresRegister(); @@ -1904,6 +1907,33 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, return final_label; } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) @@ -1914,7 +1944,8 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, kCoreCalleeSaves.GetList(), ComputeSRegisterListMask(kFpuCalleeSaves), compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), location_builder_(graph, this), @@ -2101,7 +2132,10 @@ void CodeGeneratorARMVIXL::SetupBlockedRegisters() const { blocked_core_registers_[LR] = true; blocked_core_registers_[PC] = true; - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // TODO: We don't need to reserve marking-register for userfaultfd GC. But + // that would require some work in the assembler code as the right GC is + // chosen at load-time and not compile time. + if (kReserveMarkingRegister) { // Reserve marking register. blocked_core_registers_[MR] = true; } @@ -2164,9 +2198,24 @@ void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction); codegen_->AddSlowPath(slow_path); - int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); + if (instruction->IsMethodExitHook()) { + // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it + // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check + // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is + // disabled in debuggable runtime. The other bit is used when this method itself requires a + // deoptimization due to redefinition. So it is safe to just check for non-zero value here. + GetAssembler()->LoadFromOffset(kLoadWord, + temp, + sp, + codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); + __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + } + + MemberOffset offset = instruction->IsMethodExitHook() ? + instrumentation::Instrumentation::HaveMethodExitListenersOffset() : + instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation()); - __ Mov(temp, address + offset); + __ Mov(temp, address + offset.Int32Value()); __ Ldrb(temp, MemOperand(temp, 0)); __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -2234,6 +2283,61 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Label resolution; + vixl32::Label memory_barrier; + + // Check if we're visibly initialized. + + vixl32::Register temp1 = temps.Acquire(); + // Use r4 as other temporary register. + DCHECK(!blocked_core_registers_[R4]); + DCHECK(!kCoreCalleeSaves.Includes(r4)); + vixl32::Register temp2 = r4; + for (vixl32::Register reg : kParameterCoreRegistersVIXL) { + DCHECK(!reg.Is(r4)); + } + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value())); + __ Ldrb(temp2, MemOperand(temp1, status_byte_offset)); + __ Cmp(temp2, shifted_visibly_initialized_value); + __ B(cs, &frame_entry_label_); + + // Check if we're initialized and jump to code that does a memory barrier if + // so. + __ Cmp(temp2, shifted_initialized_value); + __ B(cs, &memory_barrier); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ Cmp(temp2, shifted_initializing_value); + __ B(lo, &resolution); + + __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value())); + __ Cmp(temp1, temp2); + __ B(eq, &frame_entry_label_); + __ Bind(&resolution); + + // Jump to the resolution stub. + ThreadOffset32 entrypoint_offset = + GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline); + __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value())); + __ Bx(temp1); + + __ Bind(&memory_barrier); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + __ Bind(&frame_entry_label_); if (HasEmptyFrame()) { @@ -3069,12 +3173,12 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { } } -void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetAllocator()) LocationSummary(info); +void LocationsBuilderARMVIXL::VisitNop(HNop* nop) { + new (GetGraph()->GetAllocator()) LocationSummary(nop); } -void InstructionCodeGeneratorARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo*) { - // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. +void InstructionCodeGeneratorARMVIXL::VisitNop(HNop*) { + // The environment recording already happened in CodeGenerator::Compile. } void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) { @@ -4514,10 +4618,11 @@ void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) { switch (div->GetResultType()) { case DataType::Type::kInt32: { - if (div->InputAt(1)->IsConstant()) { + HInstruction* divisor = div->InputAt(1); + if (divisor->IsConstant()) { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); - int32_t value = Int32ConstantFrom(div->InputAt(1)); + locations->SetInAt(1, Location::ConstantLocation(divisor)); + int32_t value = Int32ConstantFrom(divisor); Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap; if (value == 1 || value == 0 || value == -1) { // No temp register required. @@ -4631,10 +4736,11 @@ void LocationsBuilderARMVIXL::VisitRem(HRem* rem) { switch (type) { case DataType::Type::kInt32: { - if (rem->InputAt(1)->IsConstant()) { + HInstruction* divisor = rem->InputAt(1); + if (divisor->IsConstant()) { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant())); - int32_t value = Int32ConstantFrom(rem->InputAt(1)); + locations->SetInAt(1, Location::ConstantLocation(divisor)); + int32_t value = Int32ConstantFrom(divisor); Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap; if (value == 1 || value == 0 || value == -1) { // No temp register required. @@ -5187,17 +5293,18 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { void LocationsBuilderARMVIXL::VisitRor(HRor* ror) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall); + HInstruction* shift = ror->InputAt(1); switch (ror->GetResultType()) { case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrConstant(shift)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); - if (ror->InputAt(1)->IsConstant()) { - locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant())); + if (shift->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(shift)); } else { locations->SetInAt(1, Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); @@ -5234,11 +5341,12 @@ void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall); + HInstruction* shift = op->InputAt(1); switch (op->GetResultType()) { case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); - if (op->InputAt(1)->IsConstant()) { - locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); + if (shift->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(shift)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } else { locations->SetInAt(1, Location::RequiresRegister()); @@ -5250,8 +5358,8 @@ void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) { } case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); - if (op->InputAt(1)->IsConstant()) { - locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); + if (shift->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(shift)); // For simplicity, use kOutputOverlap even though we only require that low registers // don't clash with high registers which the register allocator currently guarantees. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -5727,8 +5835,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register a __ CompareAndBranchIfNonZero(temp1, &fail); } -void LocationsBuilderARMVIXL::HandleFieldSet( - HInstruction* instruction, const FieldInfo& field_info) { +void LocationsBuilderARMVIXL::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = @@ -5751,8 +5860,12 @@ void LocationsBuilderARMVIXL::HandleFieldSet( // Temporary registers for the write barrier. // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. - locations->AddTemp(Location::RequiresRegister()); + if (write_barrier_kind != WriteBarrierKind::kDontEmit) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else if (kPoisonHeapReferences) { + locations->AddTemp(Location::RequiresRegister()); + } } else if (generate_volatile) { // ARM encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive @@ -5773,7 +5886,8 @@ void LocationsBuilderARMVIXL::HandleFieldSet( void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = instruction->GetLocations(); @@ -5889,10 +6003,16 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) && + write_barrier_kind != WriteBarrierKind::kDontEmit) { vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); vixl32::Register card = RegisterFrom(locations->GetTemp(1)); - codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null); + codegen_->MarkGCCard( + temp, + card, + base, + RegisterFrom(value), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_volatile) { @@ -5911,7 +6031,7 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, instruction->IsPredicatedInstanceFieldGet()); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference); + gUseReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference); bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, @@ -5975,7 +6095,7 @@ Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* inpu DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType(); if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) || (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) { - return Location::ConstantLocation(input->AsConstant()); + return Location::ConstantLocation(input); } else { return Location::RequiresFpuRegister(); } @@ -5986,7 +6106,7 @@ Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* c DCHECK(!DataType::IsFloatingPointType(constant->GetType())); if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) { - return Location::ConstantLocation(constant->AsConstant()); + return Location::ConstantLocation(constant); } return Location::RequiresRegister(); } @@ -6082,7 +6202,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, case DataType::Type::kReference: { // /* HeapReference<Object> */ out = *(base + offset) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location(); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call. @@ -6165,11 +6285,14 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, } void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { @@ -6202,11 +6325,14 @@ void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instr } void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { @@ -6386,7 +6512,7 @@ void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type, void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -6534,14 +6660,14 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { // The read barrier instrumentation of object ArrayGet // instructions does not support the HIntermediateAddress // instruction. - DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); + DCHECK(!(has_intermediate_address && gUseReadBarrier)); static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call. DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); @@ -6688,8 +6814,10 @@ void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) { locations->SetInAt(2, Location::RequiresRegister()); } if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + // Temporary registers for the write barrier or register poisoning. + // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of + // InstructionCodeGeneratorARMVIXL::VisitArraySet. + locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); } } @@ -6841,7 +6969,11 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { } } - codegen_->MarkGCCard(temp1, temp2, array, value, /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + codegen_->MarkGCCard(temp1, temp2, array, value, /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsReferenced()); @@ -7025,10 +7157,10 @@ void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) { // locations. bool both_const = index->IsConstant() && length->IsConstant(); locations->SetInAt(0, both_const - ? Location::ConstantLocation(index->AsConstant()) + ? Location::ConstantLocation(index) : ArmEncodableConstantOrRegister(index, CMP)); locations->SetInAt(1, both_const - ? Location::ConstantLocation(length->AsConstant()) + ? Location::ConstantLocation(length) : ArmEncodableConstantOrRegister(length, CMP)); } @@ -7072,9 +7204,9 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, vixl32::Register card, vixl32::Register object, vixl32::Register value, - bool value_can_be_null) { + bool emit_null_check) { vixl32::Label is_null; - if (value_can_be_null) { + if (emit_null_check) { __ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false); } // Load the address of the card table into `card`. @@ -7097,7 +7229,7 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, // of the card to mark; and 2. to load the `kCardDirty` value) saves a load // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp)); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&is_null); } } @@ -7459,7 +7591,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -7473,7 +7605,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { } locations->SetOut(Location::RequiresRegister()); if (load_kind == HLoadClass::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -7501,7 +7633,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ const ReadBarrierOption read_barrier_option = cls->IsInBootImage() ? kWithoutReadBarrier - : kCompilerReadBarrierOption; + : gCompilerReadBarrierOption; bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -7622,12 +7754,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << status_lsb_position; - - const size_t status_offset = mirror::Class::StatusOffset().SizeValue(); - GetAssembler()->LoadFromOffset(kLoadWord, temp, class_reg, status_offset); + __ Ldrb(temp, MemOperand(class_reg, status_byte_offset)); __ Cmp(temp, shifted_visibly_initialized_value); __ B(lo, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -7721,7 +7848,7 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { } else { locations->SetOut(Location::RequiresRegister()); if (load_kind == HLoadString::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need, including temps. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -7760,7 +7887,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE codegen_->EmitMovwMovtPlaceholder(labels, out); // All aligned loads are implicitly atomic consume operations on ARM. codegen_->GenerateGcRootFieldLoad( - load, out_loc, out, /*offset=*/ 0, kCompilerReadBarrierOption); + load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption); LoadStringSlowPathARMVIXL* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load); codegen_->AddSlowPath(slow_path); @@ -7781,7 +7908,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE load->GetString())); // /* GcRoot<mirror::String> */ out = *out codegen_->GenerateGcRootFieldLoad( - load, out_loc, out, /*offset=*/ 0, kCompilerReadBarrierOption); + load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption); return; } default: @@ -7838,7 +7965,7 @@ void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) { // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (kEmitCompilerReadBarrier && + if (gUseReadBarrier && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -7888,9 +8015,9 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { } locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::RequiresRegister()); } @@ -8185,9 +8312,9 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::RequiresRegister()); } @@ -8773,7 +8900,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister( ReadBarrierOption read_barrier_option) { vixl32::Register out_reg = RegisterFrom(out); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. @@ -8808,7 +8935,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters( vixl32::Register out_reg = RegisterFrom(out); vixl32::Register obj_reg = RegisterFrom(obj); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. @@ -8837,7 +8964,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( ReadBarrierOption read_barrier_option) { vixl32::Register root_reg = RegisterFrom(root); if (read_barrier_option == kWithReadBarrier) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. @@ -8901,7 +9028,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( void CodeGeneratorARMVIXL::GenerateIntrinsicCasMoveWithBakerReadBarrier( vixl::aarch32::Register marked_old_value, vixl::aarch32::Register old_value) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR. @@ -8935,7 +9062,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i vixl32::Register obj, const vixl32::MemOperand& src, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the @@ -9028,7 +9155,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref, Location index, Location temp, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); static_assert( @@ -9094,7 +9221,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref, void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { // The following condition is a compile-time one, so it does not have a run-time cost. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { + if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) { // The following condition is a run-time one; it is executed after the // previous compile-time test, to avoid penalizing non-debug builds. if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { @@ -9124,7 +9251,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the reference load. // @@ -9150,7 +9277,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio Location obj, uint32_t offset, Location index) { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -9165,7 +9292,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the GC root load. // diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 790ad0f8f7..f5abe6951a 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_ #include "base/enums.h" +#include "base/macros.h" #include "class_root.h" #include "code_generator.h" #include "common_arm.h" @@ -36,7 +37,7 @@ #include "aarch32/macro-assembler-aarch32.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { namespace linker { class Thumb2RelativePatcherTest; @@ -84,7 +85,7 @@ static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::Regis vixl::aarch32::r6, vixl::aarch32::r7), // Do not consider r8 as a callee-save register with Baker read barriers. - ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) + (kReserveMarkingRegister ? vixl::aarch32::RegisterList() : vixl::aarch32::RegisterList(vixl::aarch32::r8)), vixl::aarch32::RegisterList(vixl::aarch32::r10, @@ -118,6 +119,65 @@ class CodeGeneratorARMVIXL; using VIXLInt32Literal = vixl::aarch32::Literal<int32_t>; using VIXLUInt32Literal = vixl::aarch32::Literal<uint32_t>; +#define UNIMPLEMENTED_INTRINSIC_LIST_ARM(V) \ + V(MathRoundDouble) /* Could be done by changing rounding mode, maybe? */ \ + V(UnsafeCASLong) /* High register pressure */ \ + V(SystemArrayCopyChar) \ + V(LongDivideUnsigned) \ + V(CRC32Update) \ + V(CRC32UpdateBytes) \ + V(CRC32UpdateByteBuffer) \ + V(FP16ToFloat) \ + V(FP16ToHalf) \ + V(FP16Floor) \ + V(FP16Ceil) \ + V(FP16Rint) \ + V(FP16Greater) \ + V(FP16GreaterEquals) \ + V(FP16Less) \ + V(FP16LessEquals) \ + V(FP16Compare) \ + V(FP16Min) \ + V(FP16Max) \ + V(MathMultiplyHigh) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + V(SystemArrayCopyByte) \ + V(SystemArrayCopyInt) \ + /* 1.8 */ \ + V(MathFmaDouble) \ + V(MathFmaFloat) \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeCASLong) /* High register pressure */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) \ + V(JdkUnsafeCompareAndSetLong) + class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> { public: explicit JumpTableARMVIXL(HPackedSwitch* switch_instr) @@ -309,7 +369,9 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { void HandleIntegerRotate(LocationSummary* locations); void HandleLongRotate(LocationSummary* locations); void HandleShift(HBinaryOperation* operation); - void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); Location ArithmeticZeroOrFpuRegister(HInstruction* input); @@ -378,7 +440,8 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void GenerateMinMaxInt(LocationSummary* locations, bool is_min); @@ -542,7 +605,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Register card, vixl::aarch32::Register object, vixl::aarch32::Register value, - bool value_can_be_null); + bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); @@ -602,7 +665,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { struct PcRelativePatchInfo { PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx) : target_dex_file(dex_file), offset_or_index(off_or_idx) { } - PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; // Target dex file or null for .data.bmig.rel.ro patches. const DexFile* target_dex_file; diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h new file mode 100644 index 0000000000..405b39aa0a --- /dev/null +++ b/compiler/optimizing/code_generator_riscv64.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ +#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ + +#include "code_generator.h" +#include "driver/compiler_options.h" + +#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc index abec26464a..99805928e4 100644 --- a/compiler/optimizing/code_generator_utils.cc +++ b/compiler/optimizing/code_generator_utils.cc @@ -20,7 +20,7 @@ #include "nodes.h" -namespace art { +namespace art HIDDEN { void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift) { diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h index 64665adc15..9d9ab2b118 100644 --- a/compiler/optimizing/code_generator_utils.h +++ b/compiler/optimizing/code_generator_utils.h @@ -21,7 +21,9 @@ #include <cstdlib> #include <limits> -namespace art { +#include "base/macros.h" + +namespace art HIDDEN { class HInstruction; diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc index 0fe9898635..6b6e25cf0c 100644 --- a/compiler/optimizing/code_generator_vector_arm64_neon.cc +++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc @@ -23,7 +23,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) -namespace art { +namespace art HIDDEN { namespace arm64 { using helpers::DRegisterFrom; @@ -65,7 +65,7 @@ inline Location NEONEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { if (constant->IsConstant() && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { - return Location::ConstantLocation(constant->AsConstant()); + return Location::ConstantLocation(constant); } return Location::RequiresRegister(); @@ -94,7 +94,7 @@ void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* ins case DataType::Type::kFloat64: if (input->IsConstant() && NEONCanEncodeConstantAsImmediate(input->AsConstant(), instruction)) { - locations->SetInAt(0, Location::ConstantLocation(input->AsConstant())); + locations->SetInAt(0, Location::ConstantLocation(input)); locations->SetOut(Location::RequiresFpuRegister()); } else { locations->SetInAt(0, Location::RequiresFpuRegister()); @@ -881,7 +881,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati case DataType::Type::kInt32: case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -1008,13 +1008,13 @@ void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) case DataType::Type::kInt16: case DataType::Type::kInt32: case DataType::Type::kInt64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc index 824b6c9476..fe15791d3f 100644 --- a/compiler/optimizing/code_generator_vector_arm64_sve.cc +++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc @@ -23,17 +23,14 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) -namespace art { +namespace art HIDDEN { namespace arm64 { using helpers::DRegisterFrom; -using helpers::HeapOperand; using helpers::InputRegisterAt; using helpers::Int64FromLocation; using helpers::LocationFrom; using helpers::OutputRegister; -using helpers::QRegisterFrom; -using helpers::StackOperandFrom; using helpers::SveStackOperandFrom; using helpers::VRegisterFrom; using helpers::ZRegisterFrom; @@ -67,7 +64,7 @@ static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* i inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { if (constant->IsConstant() && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { - return Location::ConstantLocation(constant->AsConstant()); + return Location::ConstantLocation(constant); } return Location::RequiresRegister(); @@ -96,7 +93,7 @@ void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* inst case DataType::Type::kFloat64: if (input->IsConstant() && SVECanEncodeConstantAsImmediate(input->AsConstant(), instruction)) { - locations->SetInAt(0, Location::ConstantLocation(input->AsConstant())); + locations->SetInAt(0, Location::ConstantLocation(input)); locations->SetOut(Location::RequiresFpuRegister()); } else { locations->SetInAt(0, Location::RequiresFpuRegister()); @@ -754,7 +751,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati case DataType::Type::kInt32: case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -878,13 +875,13 @@ void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) { case DataType::Type::kInt16: case DataType::Type::kInt32: case DataType::Type::kInt64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index c46f9b7986..e8ecf28386 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -20,7 +20,7 @@ namespace vixl32 = vixl::aarch32; using namespace vixl32; // NOLINT(build/namespaces) -namespace art { +namespace art HIDDEN { namespace arm { using helpers::DRegisterFrom; @@ -640,7 +640,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati case DataType::Type::kInt16: case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -749,7 +749,7 @@ void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 9c837dd986..343a6e1af4 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -19,7 +19,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { namespace x86 { // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. @@ -42,13 +42,13 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(is_zero ? Location::RequiresFpuRegister() : Location::SameAsFirstInput()); @@ -981,7 +981,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati case DataType::Type::kInt32: case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; default: @@ -1094,13 +1094,13 @@ void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 330bf76a4a..fb6e4e753f 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -19,7 +19,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. @@ -37,13 +37,13 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru case DataType::Type::kInt16: case DataType::Type::kInt32: case DataType::Type::kInt64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(is_zero ? Location::RequiresFpuRegister() : Location::SameAsFirstInput()); @@ -964,7 +964,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati case DataType::Type::kInt32: case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; default: @@ -1072,13 +1072,13 @@ void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { case DataType::Type::kInt16: case DataType::Type::kInt32: case DataType::Type::kInt64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 8c6b8027cd..cb1cecc45a 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -20,7 +20,6 @@ #include "art_method-inl.h" #include "class_table.h" #include "code_generator_utils.h" -#include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" @@ -36,6 +35,7 @@ #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "mirror/var_handle.h" +#include "optimizing/nodes.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/assembler.h" @@ -43,7 +43,7 @@ #include "utils/x86/assembler_x86.h" #include "utils/x86/managed_register_x86.h" -namespace art { +namespace art HIDDEN { template<class MirrorType> class GcRoot; @@ -503,7 +503,7 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { : SlowPathCode(instruction), ref_(ref), unpoison_ref_before_marking_(unpoison_ref_before_marking) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; } @@ -590,7 +590,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { field_addr_(field_addr), unpoison_ref_before_marking_(unpoison_ref_before_marking), temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } @@ -744,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { obj_(obj), offset_(offset), index_(index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial object // has been overwritten by (or after) the heap object reference load // to be instrumented, e.g.: @@ -918,7 +918,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root) : SlowPathCode(instruction), out_(out), root_(root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { @@ -967,6 +967,9 @@ class MethodEntryExitHooksSlowPathX86 : public SlowPathCode { (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); + if (instruction_->IsMethodExitHook()) { + __ movl(EBX, Immediate(codegen->GetFrameSize())); + } x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); @@ -1103,6 +1106,33 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) { __ fs()->call(Address::Absolute(entry_point_offset)); } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) @@ -1115,7 +1145,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, | (1 << kFakeReturnRegister), 0, compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), @@ -1197,9 +1228,21 @@ void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* inst new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); + if (instruction->IsMethodExitHook()) { + // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it + // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check + // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is + // disabled in debuggable runtime. The other bit is used when this method itself requires a + // deoptimization due to redefinition. So it is safe to just check for non-zero value here. + __ cmpl(Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); - int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); - __ cmpb(Address::Absolute(address + offset), Immediate(0)); + MemberOffset offset = instruction->IsMethodExitHook() ? + instrumentation::Instrumentation::HaveMethodExitListenersOffset() : + instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); + __ cmpb(Address::Absolute(address + offset.Int32Value()), Immediate(0)); __ j(kNotEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -1261,6 +1304,44 @@ void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) { void CodeGeneratorX86::GenerateFrameEntry() { __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + NearLabel continue_execution, resolution; + // We'll use EBP as temporary. + __ pushl(EBP); + // Check if we're visibly initialized. + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value())); + __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_visibly_initialized_value)); + __ j(kAboveEqual, &continue_execution); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_initializing_value)); + __ j(kBelow, &resolution); + + __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value())); + __ j(kEqual, &continue_execution); + __ Bind(&resolution); + + __ popl(EBP); + // Jump to the resolution stub. + ThreadOffset32 entrypoint_offset = + GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline); + __ fs()->jmp(Address::Absolute(entrypoint_offset)); + + __ Bind(&continue_execution); + __ popl(EBP); + } + __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); @@ -1619,7 +1700,7 @@ void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type, __ movsd(dst.AsFpuRegister<XmmRegister>(), src); break; case DataType::Type::kReference: - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!gUseReadBarrier); __ movl(dst.AsRegister<Register>(), src); __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>()); break; @@ -2230,12 +2311,12 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { } } -void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetAllocator()) LocationSummary(info); +void LocationsBuilderX86::VisitNop(HNop* nop) { + new (GetGraph()->GetAllocator()) LocationSummary(nop); } -void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) { - // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. +void InstructionCodeGeneratorX86::VisitNop(HNop*) { + // The environment recording already happened in CodeGenerator::Compile. } void CodeGeneratorX86::IncreaseFrame(size_t adjustment) { @@ -2913,7 +2994,7 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case DataType::Type::kInt64: { HInstruction* input = conversion->InputAt(0); Location input_location = input->IsConstant() - ? Location::ConstantLocation(input->AsConstant()) + ? Location::ConstantLocation(input) : Location::RegisterPairLocation(EAX, EDX); locations->SetInAt(0, input_location); // Make the output overlap to please the register allocator. This greatly simplifies @@ -5689,13 +5770,10 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke DCHECK_EQ(size, linker_patches->size()); } -void CodeGeneratorX86::MarkGCCard(Register temp, - Register card, - Register object, - Register value, - bool value_can_be_null) { +void CodeGeneratorX86::MarkGCCard( + Register temp, Register card, Register object, Register value, bool emit_null_check) { NearLabel is_null; - if (value_can_be_null) { + if (emit_null_check) { __ testl(value, value); __ j(kEqual, &is_null); } @@ -5720,7 +5798,7 @@ void CodeGeneratorX86::MarkGCCard(Register temp, // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&is_null); } } @@ -5731,11 +5809,11 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI instruction->IsPredicatedInstanceFieldGet()); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, - kEmitCompilerReadBarrier + gUseReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { @@ -5793,7 +5871,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, if (load_type == DataType::Type::kReference) { // /* HeapReference<Object> */ out = *(base + offset) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( @@ -5824,7 +5902,9 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, } } -void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { +void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = @@ -5861,10 +5941,13 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too. - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + if (write_barrier_kind != WriteBarrierKind::kDontEmit) { + locations->AddTemp(Location::RequiresRegister()); + // Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } else if (kPoisonHeapReferences) { + locations->AddTemp(Location::RequiresRegister()); + } } } } @@ -5875,7 +5958,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, Address field_addr, Register base, bool is_volatile, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { LocationSummary* locations = instruction->GetLocations(); Location value = locations->InAt(value_index); bool needs_write_barrier = @@ -5988,10 +6072,15 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, codegen_->MaybeRecordImplicitNullCheck(instruction); } - if (needs_write_barrier) { + if (needs_write_barrier && write_barrier_kind != WriteBarrierKind::kDontEmit) { Register temp = locations->GetTemp(0).AsRegister<Register>(); Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null); + codegen_->MarkGCCard( + temp, + card, + base, + value.AsRegister<Register>(), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_volatile) { @@ -6001,7 +6090,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = instruction->GetLocations(); @@ -6026,7 +6116,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, field_addr, base, is_volatile, - value_can_be_null); + value_can_be_null, + write_barrier_kind); if (is_predicated) { __ Bind(&pred_is_null); @@ -6042,19 +6133,25 @@ void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instructi } void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86::VisitPredicatedInstanceFieldGet( @@ -6202,7 +6299,7 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -6244,7 +6341,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( @@ -6315,10 +6412,12 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + // Used by reference poisoning or emitting write barrier. + locations->AddTemp(Location::RequiresRegister()); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + // Only used when emitting a write barrier. Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } } } @@ -6435,9 +6534,16 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { } } - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard( - temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard(temp, + card, + array, + value.AsRegister<Register>(), + /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsLinked()); @@ -7057,7 +7163,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -7071,7 +7177,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { } locations->SetOut(Location::RequiresRegister()); if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution and/or initialization to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -7109,7 +7215,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE bool generate_null_check = false; const ReadBarrierOption read_barrier_option = cls->IsInBootImage() ? kWithoutReadBarrier - : kCompilerReadBarrierOption; + : gCompilerReadBarrierOption; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { DCHECK(!cls->CanCallRuntime()); @@ -7233,12 +7339,6 @@ void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( SlowPathCode* slow_path, Register class_reg) { - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); - __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -7296,7 +7396,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { } else { locations->SetOut(Location::RequiresRegister()); if (load_kind == HLoadString::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -7345,7 +7445,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::String> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); // No need for memory fence, thanks to the x86 memory model. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load); codegen_->AddSlowPath(slow_path); @@ -7365,7 +7465,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Label* fixup_label = codegen_->NewJitRootStringPatch( load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); return; } default: @@ -7416,7 +7516,7 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (kEmitCompilerReadBarrier && + if (gUseReadBarrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -7466,9 +7566,9 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { } locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::Any()); } @@ -7734,9 +7834,9 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { // a memory address. locations->SetInAt(1, Location::RequiresRegister()); } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::Any()); } @@ -8188,7 +8288,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister( ReadBarrierOption read_barrier_option) { Register out_reg = out.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -8222,7 +8322,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters( Register out_reg = out.AsRegister<Register>(); Register obj_reg = obj.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -8250,7 +8350,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad( ReadBarrierOption read_barrier_option) { Register root_reg = root.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: @@ -8314,7 +8414,7 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr Register obj, uint32_t offset, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) @@ -8328,7 +8428,7 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr uint32_t data_offset, Location index, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); static_assert( @@ -8347,7 +8447,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i bool needs_null_check, bool always_update_field, Register* temp) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // In slow path based read barriers, the read barrier call is @@ -8428,7 +8528,7 @@ void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the reference load. // @@ -8455,7 +8555,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -8470,7 +8570,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction, void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the GC root load. // diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 75c5cebb5e..d27155f31d 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -19,6 +19,7 @@ #include "arch/x86/instruction_set_features_x86.h" #include "base/enums.h" +#include "base/macros.h" #include "code_generator.h" #include "dex/dex_file_types.h" #include "driver/compiler_options.h" @@ -26,7 +27,7 @@ #include "parallel_move_resolver.h" #include "utils/x86/assembler_x86.h" -namespace art { +namespace art HIDDEN { namespace x86 { // Use a local definition to prevent copying mistakes. @@ -47,6 +48,61 @@ static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +#define UNIMPLEMENTED_INTRINSIC_LIST_X86(V) \ + V(MathRoundDouble) \ + V(FloatIsInfinite) \ + V(DoubleIsInfinite) \ + V(IntegerHighestOneBit) \ + V(LongHighestOneBit) \ + V(LongDivideUnsigned) \ + V(CRC32Update) \ + V(CRC32UpdateBytes) \ + V(CRC32UpdateByteBuffer) \ + V(FP16ToFloat) \ + V(FP16ToHalf) \ + V(FP16Floor) \ + V(FP16Ceil) \ + V(FP16Rint) \ + V(FP16Greater) \ + V(FP16GreaterEquals) \ + V(FP16Less) \ + V(FP16LessEquals) \ + V(FP16Compare) \ + V(FP16Min) \ + V(FP16Max) \ + V(MathMultiplyHigh) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + /* 1.8 */ \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) + class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> { public: InvokeRuntimeCallingConvention() @@ -196,7 +252,9 @@ class LocationsBuilderX86 : public HGraphVisitor { void HandleInvoke(HInvoke* invoke); void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* instruction); - void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); bool CpuHasAvxFeatureFlag(); bool CpuHasAvx2FeatureFlag(); @@ -249,7 +307,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { Address field_addr, Register base, bool is_volatile, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); private: // Generate code for the given suspend check. If not null, `successor` @@ -279,7 +338,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); // Generate a heap reference load using one register `out`: @@ -519,11 +579,8 @@ class CodeGeneratorX86 : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; // Emit a write barrier. - void MarkGCCard(Register temp, - Register card, - Register object, - Register value, - bool value_can_be_null); + void MarkGCCard( + Register temp, Register card, Register object, Register value, bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 511917a735..eea6b204fa 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -21,7 +21,6 @@ #include "class_root-inl.h" #include "class_table.h" #include "code_generator_utils.h" -#include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "gc/space/image_space.h" @@ -37,6 +36,7 @@ #include "mirror/class-inl.h" #include "mirror/object_reference.h" #include "mirror/var_handle.h" +#include "optimizing/nodes.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/assembler.h" @@ -45,7 +45,7 @@ #include "utils/x86_64/constants_x86_64.h" #include "utils/x86_64/managed_register_x86_64.h" -namespace art { +namespace art HIDDEN { template<class MirrorType> class GcRoot; @@ -510,7 +510,7 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { : SlowPathCode(instruction), ref_(ref), unpoison_ref_before_marking_(unpoison_ref_before_marking) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; } @@ -601,7 +601,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { unpoison_ref_before_marking_(unpoison_ref_before_marking), temp1_(temp1), temp2_(temp2) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } const char* GetDescription() const override { @@ -761,7 +761,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { obj_(obj), offset_(offset), index_(index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial // object has been overwritten by (or after) the heap object // reference load to be instrumented, e.g.: @@ -937,7 +937,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) : SlowPathCode(instruction), out_(out), root_(root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { @@ -986,6 +986,10 @@ class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode { (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); + if (instruction_->IsMethodExitHook()) { + // Load FrameSize to pass to the exit hook. + __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize())); + } x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); @@ -1490,6 +1494,33 @@ void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true)); } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); @@ -1506,7 +1537,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), arraysize(kFpuCalleeSaves)), compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), @@ -1561,9 +1593,22 @@ void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* i new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction); codegen_->AddSlowPath(slow_path); + if (instruction->IsMethodExitHook()) { + // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it + // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check + // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is + // disabled in debuggable runtime. The other bit is used when this method itself requires a + // deoptimization due to redefinition. So it is safe to just check for non-zero value here. + __ cmpl(Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), + Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); - int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); - __ movq(CpuRegister(TMP), Immediate(address + offset)); + MemberOffset offset = instruction->IsMethodExitHook() ? + instrumentation::Instrumentation::HaveMethodExitListenersOffset() + : instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); + __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value())); __ cmpb(Address(CpuRegister(TMP), 0), Immediate(0)); __ j(kNotEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -1653,6 +1698,44 @@ void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) { void CodeGeneratorX86_64::GenerateFrameEntry() { __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + NearLabel resolution; + // Check if we're visibly initialized. + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ movl(CpuRegister(TMP), + Address(CpuRegister(kMethodRegisterArgument), + ArtMethod::DeclaringClassOffset().Int32Value())); + __ cmpb(Address(CpuRegister(TMP), status_byte_offset), + Immediate(shifted_visibly_initialized_value)); + __ j(kAboveEqual, &frame_entry_label_); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ cmpb(Address(CpuRegister(TMP), status_byte_offset), Immediate(shifted_initializing_value)); + __ j(kBelow, &resolution); + + __ movl(CpuRegister(TMP), + Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ gs()->cmpl( + CpuRegister(TMP), + Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true)); + __ j(kEqual, &frame_entry_label_); + __ Bind(&resolution); + + // Jump to the resolution stub. + ThreadOffset64 entrypoint_offset = + GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline); + __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true)); + } + __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); @@ -2274,12 +2357,12 @@ void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { } } -void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetAllocator()) LocationSummary(info); +void LocationsBuilderX86_64::VisitNop(HNop* nop) { + new (GetGraph()->GetAllocator()) LocationSummary(nop); } -void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) { - // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. +void InstructionCodeGeneratorX86_64::VisitNop(HNop*) { + // The environment recording already happened in CodeGenerator::Compile. } void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) { @@ -5013,7 +5096,7 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { instruction->IsPredicatedInstanceFieldGet()); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, @@ -5064,7 +5147,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, if (load_type == DataType::Type::kReference) { // /* HeapReference<Object> */ out = *(base + offset) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( @@ -5119,6 +5202,9 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); } } + + // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of + // InstructionCodeGeneratorX86_64::HandleFieldSet. if (needs_write_barrier) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); @@ -5180,7 +5266,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, bool is_volatile, bool is_atomic, bool value_can_be_null, - bool byte_swap) { + bool byte_swap, + WriteBarrierKind write_barrier_kind) { LocationSummary* locations = instruction->GetLocations(); Location value = locations->InAt(value_index); @@ -5298,10 +5385,16 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, codegen_->MaybeRecordImplicitNullCheck(instruction); } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index))) { + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index)) && + write_barrier_kind != WriteBarrierKind::kDontEmit) { CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null); + codegen_->MarkGCCard( + temp, + card, + base, + value.AsRegister<CpuRegister>(), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_volatile) { @@ -5311,7 +5404,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = instruction->GetLocations(); @@ -5336,7 +5430,9 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, base, is_volatile, /*is_atomic=*/ false, - value_can_be_null); + value_can_be_null, + /*byte_swap=*/ false, + write_barrier_kind); if (is_predicated) { __ Bind(&pred_is_null); @@ -5348,7 +5444,10 @@ void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instructio } void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet( @@ -5388,7 +5487,10 @@ void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { } void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { @@ -5513,7 +5615,7 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -5551,7 +5653,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( @@ -5619,9 +5721,12 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { } if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + // Used by reference poisoning or emitting write barrier. locations->AddTemp(Location::RequiresRegister()); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + // Only used when emitting a write barrier. + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -5739,9 +5844,16 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { } } - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard( - temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->MarkGCCard(temp, + card, + array, + value.AsRegister<CpuRegister>(), + /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsLinked()); @@ -5940,9 +6052,9 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object, CpuRegister value, - bool value_can_be_null) { + bool emit_null_check) { NearLabel is_null; - if (value_can_be_null) { + if (emit_null_check) { __ testl(value, value); __ j(kEqual, &is_null); } @@ -5967,7 +6079,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, // of the card to mark; and 2. to load the `kCardDirty` value) saves a load // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), card); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&is_null); } } @@ -6282,12 +6394,6 @@ void ParallelMoveResolverX86_64::RestoreScratch(int reg) { void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( SlowPathCode* slow_path, CpuRegister class_reg) { - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); - __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -6352,7 +6458,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -6366,7 +6472,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { } locations->SetOut(Location::RequiresRegister()); if (load_kind == HLoadClass::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution and/or initialization to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -6403,7 +6509,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S const ReadBarrierOption read_barrier_option = cls->IsInBootImage() ? kWithoutReadBarrier - : kCompilerReadBarrierOption; + : gCompilerReadBarrierOption; bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -6550,7 +6656,7 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { } else { locations->SetOut(Location::RequiresRegister()); if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -6598,7 +6704,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA /* no_rip= */ false); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); // No need for memory fence, thanks to the x86-64 memory model. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load); codegen_->AddSlowPath(slow_path); @@ -6619,7 +6725,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA Label* fixup_label = codegen_->NewJitRootStringPatch( load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); return; } default: @@ -6672,7 +6778,7 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (kEmitCompilerReadBarrier && + if (gUseReadBarrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -6722,9 +6828,9 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { } locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::Any()); } @@ -7000,9 +7106,9 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { // a memory address. locations->SetInAt(1, Location::RequiresRegister()); } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::Any()); } @@ -7426,7 +7532,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister( ReadBarrierOption read_barrier_option) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -7460,7 +7566,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters( CpuRegister out_reg = out.AsRegister<CpuRegister>(); CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -7488,7 +7594,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad( ReadBarrierOption read_barrier_option) { CpuRegister root_reg = root.AsRegister<CpuRegister>(); if (read_barrier_option == kWithReadBarrier) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: @@ -7552,7 +7658,7 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in CpuRegister obj, uint32_t offset, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) @@ -7566,7 +7672,7 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in uint32_t data_offset, Location index, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); static_assert( @@ -7586,7 +7692,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction bool always_update_field, CpuRegister* temp1, CpuRegister* temp2) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // In slow path based read barriers, the read barrier call is @@ -7668,7 +7774,7 @@ void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the reference load. // @@ -7695,7 +7801,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction Location obj, uint32_t offset, Location index) { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -7710,7 +7816,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the GC root load. // diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 39a72d8211..dff2e799e0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -18,13 +18,14 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ #include "arch/x86_64/instruction_set_features_x86_64.h" +#include "base/macros.h" #include "code_generator.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86_64/assembler_x86_64.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { // Use a local definition to prevent copying mistakes. @@ -52,6 +53,53 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength = // these are not clobbered by any direct call to native code (such as math intrinsics). static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; +#define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \ + V(CRC32Update) \ + V(CRC32UpdateBytes) \ + V(CRC32UpdateByteBuffer) \ + V(FP16ToFloat) \ + V(FP16ToHalf) \ + V(FP16Floor) \ + V(FP16Ceil) \ + V(FP16Rint) \ + V(FP16Greater) \ + V(FP16GreaterEquals) \ + V(FP16Less) \ + V(FP16LessEquals) \ + V(FP16Compare) \ + V(FP16Min) \ + V(FP16Max) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + /* 1.8 */ \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { public: @@ -250,7 +298,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { bool is_volatile, bool is_atomic, bool value_can_be_null, - bool byte_swap = false); + bool byte_swap, + WriteBarrierKind write_barrier_kind); void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr); @@ -273,7 +322,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); @@ -435,7 +485,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { CpuRegister card, CpuRegister object, CpuRegister value, - bool value_can_be_null); + bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index 766bb01978..d759a16f48 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -19,30 +19,55 @@ #include "base/arena_bit_vector.h" #include "base/array_ref.h" #include "base/bit_vector-inl.h" +#include "base/globals.h" #include "base/logging.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "common_dominator.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { bool CodeSinking::Run() { - HBasicBlock* exit = graph_->GetExitBlock(); - if (exit == nullptr) { + if (graph_->GetExitBlock() == nullptr) { // Infinite loop, just bail. return false; } + + UncommonBranchSinking(); + ReturnSinking(); + return true; +} + +void CodeSinking::UncommonBranchSinking() { + HBasicBlock* exit = graph_->GetExitBlock(); + DCHECK(exit != nullptr); // TODO(ngeoffray): we do not profile branches yet, so use throw instructions // as an indicator of an uncommon branch. for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) { HInstruction* last = exit_predecessor->GetLastInstruction(); + + // TryBoundary instructions are sometimes inserted between the last instruction (e.g. Throw, + // Return) and Exit. We don't want to use that instruction for our "uncommon branch" heuristic + // because they are not as good an indicator as throwing branches, so we skip them and fetch the + // actual last instruction. + if (last->IsTryBoundary()) { + // We have an exit try boundary. Fetch the previous instruction. + DCHECK(!last->AsTryBoundary()->IsEntry()); + if (last->GetPrevious() == nullptr) { + DCHECK(exit_predecessor->IsSingleTryBoundary()); + exit_predecessor = exit_predecessor->GetSinglePredecessor(); + last = exit_predecessor->GetLastInstruction(); + } else { + last = last->GetPrevious(); + } + } + // Any predecessor of the exit that does not return, throws an exception. if (!last->IsReturn() && !last->IsReturnVoid()) { SinkCodeToUncommonBranch(exit_predecessor); } } - return true; } static bool IsInterestingInstruction(HInstruction* instruction) { @@ -88,7 +113,7 @@ static bool IsInterestingInstruction(HInstruction* instruction) { // We can only store on local allocations. Other heap references can // be escaping. Note that allocations can escape too, but we only move - // allocations if their users can move to, or are in the list of + // allocations if their users can move too, or are in the list of // post dominated blocks. if (instruction->IsInstanceFieldSet()) { if (!instruction->InputAt(0)->IsNewInstance()) { @@ -102,7 +127,7 @@ static bool IsInterestingInstruction(HInstruction* instruction) { } } - // Heap accesses cannot go pass instructions that have memory side effects, which + // Heap accesses cannot go past instructions that have memory side effects, which // we are not tracking here. Note that the load/store elimination optimization // runs before this optimization, and should have removed interesting ones. // In theory, we could handle loads of local allocations, but this is currently @@ -171,7 +196,6 @@ static bool ShouldFilterUse(HInstruction* instruction, return false; } - // Find the ideal position for moving `instruction`. If `filter` is true, // we filter out store instructions to that instruction, which are processed // first in the step (3) of the sinking algorithm. @@ -210,56 +234,52 @@ static HInstruction* FindIdealPosition(HInstruction* instruction, return nullptr; } - // Move to the first dominator not in a loop, if we can. - while (target_block->IsInLoop()) { + // Move to the first dominator not in a loop, if we can. We only do this if we are trying to hoist + // `instruction` out of a loop it wasn't a part of. + const HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); + while (target_block->IsInLoop() && target_block->GetLoopInformation() != loop_info) { if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) { break; } target_block = target_block->GetDominator(); DCHECK(target_block != nullptr); } - const bool was_in_loop = target_block->IsInLoop(); - - // For throwing instructions we can move them into: - // * Blocks that are not part of a try - // * Catch blocks are suitable as well, as long as they are not part of an outer try. - // * Blocks that are part of the same try that the instrucion was already in. - // - // We cannot move an instruction that can throw into a try that said instruction is not a part of - // already, as that would mean it will throw into a different catch block. If we detect that - // `target_block` is not a valid block to move `instruction` to, we traverse up the dominator tree - // to find if we have a suitable block. - while (instruction->CanThrow() && target_block->GetTryCatchInformation() != nullptr) { - if (target_block->IsCatchBlock()) { - // If the catch block has an xhandler, it means it is inside of an outer try. - const bool inside_of_another_try_catch = target_block->GetSuccessors().size() != 1; - if (!inside_of_another_try_catch) { - // If we have a catch block, it's okay to sink as long as that catch is not inside of - // another try catch. - break; + + if (instruction->CanThrow()) { + // Consistency check: We shouldn't land in a loop if we weren't in one before traversing up the + // dominator tree regarding try catches. + const bool was_in_loop = target_block->IsInLoop(); + + // We cannot move an instruction that can throw into a try that said instruction is not a part + // of already, as that would mean it will throw into a different catch block. In short, for + // throwing instructions: + // * If the throwing instruction is part of a try, they should only be sunk into that same try. + // * If the throwing instruction is not part of any try, they shouldn't be sunk to any try. + if (instruction->GetBlock()->IsTryBlock()) { + const HTryBoundary& try_entry = + instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry(); + while (!(target_block->IsTryBlock() && + try_entry.HasSameExceptionHandlersAs( + target_block->GetTryCatchInformation()->GetTryEntry()))) { + target_block = target_block->GetDominator(); + if (!post_dominated.IsBitSet(target_block->GetBlockId())) { + // We couldn't find a suitable block. + return nullptr; + } } } else { - DCHECK(target_block->IsTryBlock()); - if (instruction->GetBlock()->IsTryBlock() && - instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry().GetId() == - target_block->GetTryCatchInformation()->GetTryEntry().GetId()) { - // Sink within the same try block is allowed. - break; + // Search for the first block also not in a try block + while (target_block->IsTryBlock()) { + target_block = target_block->GetDominator(); + if (!post_dominated.IsBitSet(target_block->GetBlockId())) { + // We couldn't find a suitable block. + return nullptr; + } } } - // We are now in the case where we would be moving to a different try. Since we don't want - // that, traverse up the dominator tree to find a suitable block. - if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) { - // We couldn't find a suitable block. - return nullptr; - } - target_block = target_block->GetDominator(); - DCHECK(target_block != nullptr); - } - // We shouldn't land in a loop if we weren't in one before traversing up the dominator tree - // regarding try catches. - DCHECK_IMPLIES(target_block->IsInLoop(), was_in_loop); + DCHECK_IMPLIES(target_block->IsInLoop(), was_in_loop); + } // Find insertion position. No need to filter anymore, as we have found a // target block. @@ -271,10 +291,21 @@ static HInstruction* FindIdealPosition(HInstruction* instruction, } } for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { - HInstruction* user = use.GetUser()->GetHolder(); + HEnvironment* env = use.GetUser(); + HInstruction* user = env->GetHolder(); if (user->GetBlock() == target_block && (insert_pos == nullptr || user->StrictlyDominates(insert_pos))) { - insert_pos = user; + if (target_block->IsCatchBlock() && target_block->GetFirstInstruction() == user) { + // We can sink the instructions past the environment setting Nop. If we do that, we have to + // remove said instruction from the environment. Since we know that we will be sinking the + // instruction to this block and there are no more instructions to consider, we can safely + // remove it from the environment now. + DCHECK(target_block->GetFirstInstruction()->IsNop()); + env->RemoveAsUserOfInput(use.GetIndex()); + env->SetRawEnvAt(use.GetIndex(), /*instruction=*/ nullptr); + } else { + insert_pos = user; + } } } if (insert_pos == nullptr) { @@ -310,8 +341,8 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc)); // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`. - // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by - // computint the post dominator tree, but that could be too time consuming. Also, + // TODO(ngeoffray): Getting the full set of post-dominated should be done by + // computing the post dominator tree, but that could be too time consuming. Also, // we should start the analysis from blocks dominated by an uncommon branch, but we // don't profile branches yet. bool found_block = false; @@ -321,45 +352,43 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { post_dominated.SetBit(block->GetBlockId()); } else if (found_block) { bool is_post_dominated = true; - if (block->GetSuccessors().empty()) { - // We currently bail for loops. - is_post_dominated = false; - } else { - // BasicBlock that are try entries look like this: - // BasicBlock i: - // instr 1 - // ... - // instr N - // TryBoundary kind:entry ---Try begins here--- - // - // Due to how our BasicBlocks are structured, BasicBlock i will have an xhandler successor - // since we are starting a try. If we use `GetSuccessors` for this case, we will check if - // the catch block is post_dominated. - // - // However, this catch block doesn't matter: when we sink the instruction into that - // BasicBlock i, we do it before the TryBoundary (i.e. outside of the try and outside the - // catch's domain). We can ignore catch blocks using `GetNormalSuccessors` to sink code - // right before the start of a try block. - // - // On the other side of the coin, BasicBlock that are try exits look like this: - // BasicBlock j: - // instr 1 - // ... - // instr N - // TryBoundary kind:exit ---Try ends here--- - // - // If we sink to these basic blocks we would be sinking inside of the try so we would like - // to check the catch block for post dominance. - const bool ends_with_try_boundary_entry = - block->EndsWithTryBoundary() && block->GetLastInstruction()->AsTryBoundary()->IsEntry(); - ArrayRef<HBasicBlock* const> successors = - ends_with_try_boundary_entry ? block->GetNormalSuccessors() : - ArrayRef<HBasicBlock* const>(block->GetSuccessors()); - for (HBasicBlock* successor : successors) { - if (!post_dominated.IsBitSet(successor->GetBlockId())) { - is_post_dominated = false; - break; - } + DCHECK_NE(block, graph_->GetExitBlock()) + << "We shouldn't encounter the exit block after `end_block`."; + + // BasicBlock that are try entries look like this: + // BasicBlock i: + // instr 1 + // ... + // instr N + // TryBoundary kind:entry ---Try begins here--- + // + // Due to how our BasicBlocks are structured, BasicBlock i will have an xhandler successor + // since we are starting a try. If we use `GetSuccessors` for this case, we will check if + // the catch block is post_dominated. + // + // However, this catch block doesn't matter: when we sink the instruction into that + // BasicBlock i, we do it before the TryBoundary (i.e. outside of the try and outside the + // catch's domain). We can ignore catch blocks using `GetNormalSuccessors` to sink code + // right before the start of a try block. + // + // On the other side of the coin, BasicBlock that are try exits look like this: + // BasicBlock j: + // instr 1 + // ... + // instr N + // TryBoundary kind:exit ---Try ends here--- + // + // If we sink to these basic blocks we would be sinking inside of the try so we would like + // to check the catch block for post dominance. + const bool ends_with_try_boundary_entry = + block->EndsWithTryBoundary() && block->GetLastInstruction()->AsTryBoundary()->IsEntry(); + ArrayRef<HBasicBlock* const> successors = + ends_with_try_boundary_entry ? block->GetNormalSuccessors() : + ArrayRef<HBasicBlock* const>(block->GetSuccessors()); + for (HBasicBlock* successor : successors) { + if (!post_dominated.IsBitSet(successor->GetBlockId())) { + is_post_dominated = false; + break; } } if (is_post_dominated) { @@ -509,4 +538,79 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { } } +void CodeSinking::ReturnSinking() { + HBasicBlock* exit = graph_->GetExitBlock(); + DCHECK(exit != nullptr); + + int number_of_returns = 0; + bool saw_return = false; + for (HBasicBlock* pred : exit->GetPredecessors()) { + // TODO(solanes): We might have Return/ReturnVoid->TryBoundary->Exit. We can theoretically + // handle them and move them out of the TryBoundary. However, it is a border case and it adds + // codebase complexity. + if (pred->GetLastInstruction()->IsReturn() || pred->GetLastInstruction()->IsReturnVoid()) { + saw_return |= pred->GetLastInstruction()->IsReturn(); + ++number_of_returns; + } + } + + if (number_of_returns < 2) { + // Nothing to do. + return; + } + + // `new_block` will coalesce the Return instructions into Phi+Return, or the ReturnVoid + // instructions into a ReturnVoid. + HBasicBlock* new_block = new (graph_->GetAllocator()) HBasicBlock(graph_, exit->GetDexPc()); + if (saw_return) { + HPhi* new_phi = nullptr; + for (size_t i = 0; i < exit->GetPredecessors().size(); /*++i in loop*/) { + HBasicBlock* pred = exit->GetPredecessors()[i]; + if (!pred->GetLastInstruction()->IsReturn()) { + ++i; + continue; + } + + HReturn* ret = pred->GetLastInstruction()->AsReturn(); + if (new_phi == nullptr) { + // Create the new_phi, if we haven't done so yet. We do it here since we need to know the + // type to assign to it. + new_phi = new (graph_->GetAllocator()) HPhi(graph_->GetAllocator(), + kNoRegNumber, + /*number_of_inputs=*/0, + ret->InputAt(0)->GetType()); + new_block->AddPhi(new_phi); + } + new_phi->AddInput(ret->InputAt(0)); + pred->ReplaceAndRemoveInstructionWith(ret, + new (graph_->GetAllocator()) HGoto(ret->GetDexPc())); + pred->ReplaceSuccessor(exit, new_block); + // Since we are removing a predecessor, there's no need to increment `i`. + } + new_block->AddInstruction(new (graph_->GetAllocator()) HReturn(new_phi, exit->GetDexPc())); + } else { + for (size_t i = 0; i < exit->GetPredecessors().size(); /*++i in loop*/) { + HBasicBlock* pred = exit->GetPredecessors()[i]; + if (!pred->GetLastInstruction()->IsReturnVoid()) { + ++i; + continue; + } + + HReturnVoid* ret = pred->GetLastInstruction()->AsReturnVoid(); + pred->ReplaceAndRemoveInstructionWith(ret, + new (graph_->GetAllocator()) HGoto(ret->GetDexPc())); + pred->ReplaceSuccessor(exit, new_block); + // Since we are removing a predecessor, there's no need to increment `i`. + } + new_block->AddInstruction(new (graph_->GetAllocator()) HReturnVoid(exit->GetDexPc())); + } + + new_block->AddSuccessor(exit); + graph_->AddBlock(new_block); + + // Recompute dominance since we added a new block. + graph_->ClearDominanceInformation(); + graph_->ComputeDominanceInformation(); +} + } // namespace art diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h index 8eb3a520c3..c743db40d9 100644 --- a/compiler/optimizing/code_sinking.h +++ b/compiler/optimizing/code_sinking.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_SINKING_H_ #define ART_COMPILER_OPTIMIZING_CODE_SINKING_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Optimization pass to move instructions into uncommon branches, @@ -38,10 +39,16 @@ class CodeSinking : public HOptimization { static constexpr const char* kCodeSinkingPassName = "code_sinking"; private: - // Try to move code only used by `end_block` and all its post-dominated / dominated + // Tries to sink code to uncommon branches. + void UncommonBranchSinking(); + // Tries to move code only used by `end_block` and all its post-dominated / dominated // blocks, to these blocks. void SinkCodeToUncommonBranch(HBasicBlock* end_block); + // Coalesces the Return/ReturnVoid instructions into one, if we have two or more. We do this to + // avoid generating the exit frame code several times. + void ReturnSinking(); + DISALLOW_COPY_AND_ASSIGN(CodeSinking); }; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index c0441b07ed..2d9acc49b3 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -33,7 +33,7 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { // Return all combinations of ISA and code generator that are executable on // hardware, or on simulator, and that we'd like to test. @@ -64,7 +64,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { return v; } -class CodegenTest : public OptimizingUnitTest { +class CodegenTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, bool has_result = false, int32_t expected = 0); void TestCodeLong(const std::vector<uint16_t>& data, bool has_result, int64_t expected); diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index 397e601cee..7af9d0f44c 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -20,6 +20,7 @@ #include "arch/arm/registers_arm.h" #include "arch/instruction_set.h" #include "arch/x86/registers_x86.h" +#include "base/macros.h" #include "code_simulator.h" #include "code_simulator_container.h" #include "common_compiler_test.h" @@ -35,6 +36,10 @@ #include "code_generator_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 +#include "code_generator_riscv64.h" +#endif + #ifdef ART_ENABLE_CODEGEN_x86 #include "code_generator_x86.h" #endif @@ -43,9 +48,9 @@ #include "code_generator_x86_64.h" #endif -namespace art { +namespace art HIDDEN { -typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&); +using CreateCodegenFn = CodeGenerator* (*)(HGraph*, const CompilerOptions&); class CodegenTargetConfig { public: @@ -254,15 +259,11 @@ static void Run(const InternalCodeAllocator& allocator, Runtime* GetRuntime() override { return nullptr; } }; CodeHolder code_holder; - const void* code_ptr = + const void* method_code = code_holder.MakeExecutable(allocator.GetMemory(), ArrayRef<const uint8_t>(), target_isa); - typedef Expected (*fptr)(); - fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(code_ptr)); - if (target_isa == InstructionSet::kThumb2) { - // For thumb we need the bottom bit set. - f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1); - } + using fptr = Expected (*)(); + fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(method_code)); VerifyGeneratedCode(target_isa, f, has_result, expected); } @@ -332,6 +333,10 @@ inline CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& } #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 +inline CodeGenerator* create_codegen_riscv64(HGraph*, const CompilerOptions&) { return nullptr; } +#endif + #ifdef ART_ENABLE_CODEGEN_x86 inline CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) { return new (graph->GetAllocator()) TestCodeGeneratorX86(graph, compiler_options); diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index 320915ee57..5f71cb906c 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ +#include "base/macros.h" #include "instruction_simplifier_shared.h" #include "locations.h" #include "nodes.h" @@ -28,7 +29,7 @@ #include "aarch32/macro-assembler-aarch32.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { using helpers::HasShifterOperand; diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 81c6561318..20b0e38af5 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ +#include "base/macros.h" #include "code_generator.h" #include "instruction_simplifier_shared.h" #include "locations.h" @@ -31,7 +32,7 @@ #include "aarch64/simulator-aarch64.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { using helpers::CanFitInShifterOperand; using helpers::HasShifterOperand; @@ -153,7 +154,7 @@ inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* inst int index) { HInstruction* input = instr->InputAt(index); DataType::Type input_type = input->GetType(); - if (input->IsConstant() && input->AsConstant()->IsZeroBitPattern()) { + if (IsZeroBitPattern(input)) { return (DataType::Size(input_type) >= vixl::aarch64::kXRegSizeInBytes) ? vixl::aarch64::Register(vixl::aarch64::xzr) : vixl::aarch64::Register(vixl::aarch64::wzr); @@ -314,7 +315,7 @@ inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { if (constant->IsConstant() && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { - return Location::ConstantLocation(constant->AsConstant()); + return Location::ConstantLocation(constant); } return Location::RequiresRegister(); @@ -380,10 +381,6 @@ inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { return instruction->IsAdd() || instruction->IsSub(); } -inline bool IsConstantZeroBitPattern(const HInstruction* instruction) { - return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern(); -} - } // namespace helpers } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h index 9f012cfbb2..f01270ee4a 100644 --- a/compiler/optimizing/common_dominator.h +++ b/compiler/optimizing/common_dominator.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_ #define ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_ +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { // Helper class for finding common dominators of two or more blocks in a graph. // The domination information of a graph must not be modified while there is diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 2031707759..06d19e3f29 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -16,14 +16,20 @@ #include "constant_folding.h" -namespace art { +#include <algorithm> + +#include "dex/dex_file-inl.h" +#include "optimizing/data_type.h" +#include "optimizing/nodes.h" + +namespace art HIDDEN { // This visitor tries to simplify instructions that can be evaluated // as constants. -class HConstantFoldingVisitor : public HGraphDelegateVisitor { +class HConstantFoldingVisitor final : public HGraphDelegateVisitor { public: - explicit HConstantFoldingVisitor(HGraph* graph) - : HGraphDelegateVisitor(graph) {} + HConstantFoldingVisitor(HGraph* graph, OptimizingCompilerStats* stats, bool use_all_optimizations) + : HGraphDelegateVisitor(graph, stats), use_all_optimizations_(use_all_optimizations) {} private: void VisitBasicBlock(HBasicBlock* block) override; @@ -31,8 +37,15 @@ class HConstantFoldingVisitor : public HGraphDelegateVisitor { void VisitUnaryOperation(HUnaryOperation* inst) override; void VisitBinaryOperation(HBinaryOperation* inst) override; - void VisitTypeConversion(HTypeConversion* inst) override; + void VisitArrayLength(HArrayLength* inst) override; void VisitDivZeroCheck(HDivZeroCheck* inst) override; + void VisitIf(HIf* inst) override; + void VisitTypeConversion(HTypeConversion* inst) override; + + void PropagateValue(HBasicBlock* starting_block, HInstruction* variable, HConstant* constant); + + // Use all optimizations without restrictions. + bool use_all_optimizations_; DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor); }; @@ -55,6 +68,11 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { void VisitBelow(HBelow* instruction) override; void VisitBelowOrEqual(HBelowOrEqual* instruction) override; + void VisitGreaterThan(HGreaterThan* instruction) override; + void VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) override; + void VisitLessThan(HLessThan* instruction) override; + void VisitLessThanOrEqual(HLessThanOrEqual* instruction) override; + void VisitAnd(HAnd* instruction) override; void VisitCompare(HCompare* instruction) override; void VisitMul(HMul* instruction) override; @@ -69,7 +87,7 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { bool HConstantFolding::Run() { - HConstantFoldingVisitor visitor(graph_); + HConstantFoldingVisitor visitor(graph_, stats_, use_all_optimizations_); // Process basic blocks in reverse post-order in the dominator tree, // so that an instruction turned into a constant, used as input of // another instruction, may possibly be used to turn that second @@ -111,16 +129,6 @@ void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) { } } -void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) { - // Constant folding: replace `TypeConversion(a)' with a constant at - // compile time if `a' is a constant. - HConstant* constant = inst->TryStaticEvaluation(); - if (constant != nullptr) { - inst->ReplaceWith(constant); - inst->GetBlock()->RemoveInstruction(inst); - } -} - void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) { // We can safely remove the check if the input is a non-null constant. HInstruction* check_input = inst->InputAt(0); @@ -130,6 +138,169 @@ void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) { } } +void HConstantFoldingVisitor::PropagateValue(HBasicBlock* starting_block, + HInstruction* variable, + HConstant* constant) { + const bool recording_stats = stats_ != nullptr; + size_t uses_before = 0; + size_t uses_after = 0; + if (recording_stats) { + uses_before = variable->GetUses().SizeSlow(); + } + + if (variable->GetUses().HasExactlyOneElement()) { + // Nothing to do, since we only have the `if (variable)` use or the `condition` use. + return; + } + + variable->ReplaceUsesDominatedBy( + starting_block->GetFirstInstruction(), constant, /* strictly_dominated= */ false); + + if (recording_stats) { + uses_after = variable->GetUses().SizeSlow(); + DCHECK_GE(uses_after, 1u) << "we must at least have the use in the if clause."; + DCHECK_GE(uses_before, uses_after); + MaybeRecordStat(stats_, MethodCompilationStat::kPropagatedIfValue, uses_before - uses_after); + } +} + +void HConstantFoldingVisitor::VisitIf(HIf* inst) { + // This optimization can take a lot of compile time since we have a lot of If instructions in + // graphs. + if (!use_all_optimizations_) { + return; + } + + // Consistency check: the true and false successors do not dominate each other. + DCHECK(!inst->IfTrueSuccessor()->Dominates(inst->IfFalseSuccessor()) && + !inst->IfFalseSuccessor()->Dominates(inst->IfTrueSuccessor())); + + HInstruction* if_input = inst->InputAt(0); + + // Already a constant. + if (if_input->IsConstant()) { + return; + } + + // if (variable) { + // SSA `variable` guaranteed to be true + // } else { + // and here false + // } + PropagateValue(inst->IfTrueSuccessor(), if_input, GetGraph()->GetIntConstant(1)); + PropagateValue(inst->IfFalseSuccessor(), if_input, GetGraph()->GetIntConstant(0)); + + // If the input is a condition, we can propagate the information of the condition itself. + if (!if_input->IsCondition()) { + return; + } + HCondition* condition = if_input->AsCondition(); + + // We want either `==` or `!=`, since we cannot make assumptions for other conditions e.g. `>` + if (!condition->IsEqual() && !condition->IsNotEqual()) { + return; + } + + HInstruction* left = condition->GetLeft(); + HInstruction* right = condition->GetRight(); + + // We want one of them to be a constant and not the other. + if (left->IsConstant() == right->IsConstant()) { + return; + } + + // At this point we have something like: + // if (variable == constant) { + // SSA `variable` guaranteed to be equal to constant here + // } else { + // No guarantees can be made here (usually, see boolean case below). + // } + // Similarly with variable != constant, except that we can make guarantees in the else case. + + HConstant* constant = left->IsConstant() ? left->AsConstant() : right->AsConstant(); + HInstruction* variable = left->IsConstant() ? right : left; + + // Don't deal with floats/doubles since they bring a lot of edge cases e.g. + // if (f == 0.0f) { + // // f is not really guaranteed to be 0.0f. It could be -0.0f, for example + // } + if (DataType::IsFloatingPointType(variable->GetType())) { + return; + } + DCHECK(!DataType::IsFloatingPointType(constant->GetType())); + + // Sometimes we have an HCompare flowing into an Equals/NonEquals, which can act as a proxy. For + // example: `Equals(Compare(var, constant), 0)`. This is common for long, float, and double. + if (variable->IsCompare()) { + // We only care about equality comparisons so we skip if it is a less or greater comparison. + if (!constant->IsArithmeticZero()) { + return; + } + + // Update left and right to be the ones from the HCompare. + left = variable->AsCompare()->GetLeft(); + right = variable->AsCompare()->GetRight(); + + // Re-check that one of them to be a constant and not the other. + if (left->IsConstant() == right->IsConstant()) { + return; + } + + constant = left->IsConstant() ? left->AsConstant() : right->AsConstant(); + variable = left->IsConstant() ? right : left; + + // Re-check floating point values. + if (DataType::IsFloatingPointType(variable->GetType())) { + return; + } + DCHECK(!DataType::IsFloatingPointType(constant->GetType())); + } + + // From this block forward we want to replace the SSA value. We use `starting_block` and not the + // `if` block as we want to update one of the branches but not the other. + HBasicBlock* starting_block = + condition->IsEqual() ? inst->IfTrueSuccessor() : inst->IfFalseSuccessor(); + + PropagateValue(starting_block, variable, constant); + + // Special case for booleans since they have only two values so we know what to propagate in the + // other branch. However, sometimes our boolean values are not compared to 0 or 1. In those cases + // we cannot make an assumption for the `else` branch. + if (variable->GetType() == DataType::Type::kBool && + constant->IsIntConstant() && + (constant->AsIntConstant()->IsTrue() || constant->AsIntConstant()->IsFalse())) { + HBasicBlock* other_starting_block = + condition->IsEqual() ? inst->IfFalseSuccessor() : inst->IfTrueSuccessor(); + DCHECK_NE(other_starting_block, starting_block); + + HConstant* other_constant = constant->AsIntConstant()->IsTrue() ? + GetGraph()->GetIntConstant(0) : + GetGraph()->GetIntConstant(1); + DCHECK_NE(other_constant, constant); + PropagateValue(other_starting_block, variable, other_constant); + } +} + +void HConstantFoldingVisitor::VisitArrayLength(HArrayLength* inst) { + HInstruction* input = inst->InputAt(0); + if (input->IsLoadString()) { + DCHECK(inst->IsStringLength()); + HLoadString* load_string = input->AsLoadString(); + const DexFile& dex_file = load_string->GetDexFile(); + const dex::StringId& string_id = dex_file.GetStringId(load_string->GetStringIndex()); + inst->ReplaceWith(GetGraph()->GetIntConstant(dex_file.GetStringLength(string_id))); + } +} + +void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) { + // Constant folding: replace `TypeConversion(a)' with a constant at + // compile time if `a' is a constant. + HConstant* constant = inst->TryStaticEvaluation(); + if (constant != nullptr) { + inst->ReplaceWith(constant); + inst->GetBlock()->RemoveInstruction(inst); + } +} void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) { DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); @@ -145,8 +316,17 @@ void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instr } void InstructionWithAbsorbingInputSimplifier::VisitEqual(HEqual* instruction) { - if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) || - (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) { + if (instruction->GetLeft() == instruction->GetRight() && + !DataType::IsFloatingPointType(instruction->GetLeft()->GetType())) { + // Replace code looking like + // EQUAL lhs, lhs + // CONSTANT true + // We don't perform this optimizations for FP types since Double.NaN != Double.NaN, which is the + // opposite value. + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) || + (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) { // Replace code looking like // EQUAL lhs, null // where lhs cannot be null with @@ -157,8 +337,17 @@ void InstructionWithAbsorbingInputSimplifier::VisitEqual(HEqual* instruction) { } void InstructionWithAbsorbingInputSimplifier::VisitNotEqual(HNotEqual* instruction) { - if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) || - (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) { + if (instruction->GetLeft() == instruction->GetRight() && + !DataType::IsFloatingPointType(instruction->GetLeft()->GetType())) { + // Replace code looking like + // NOT_EQUAL lhs, lhs + // CONSTANT false + // We don't perform this optimizations for FP types since Double.NaN != Double.NaN, which is the + // opposite value. + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) || + (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) { // Replace code looking like // NOT_EQUAL lhs, null // where lhs cannot be null with @@ -169,8 +358,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitNotEqual(HNotEqual* instructi } void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) { - if (instruction->GetLeft()->IsConstant() && - instruction->GetLeft()->AsConstant()->IsArithmeticZero()) { + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // ABOVE lhs, lhs + // CONSTANT false + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (instruction->GetLeft()->IsConstant() && + instruction->GetLeft()->AsConstant()->IsArithmeticZero()) { // Replace code looking like // ABOVE dst, 0, src // unsigned 0 > src is always false // with @@ -181,8 +376,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) { } void InstructionWithAbsorbingInputSimplifier::VisitAboveOrEqual(HAboveOrEqual* instruction) { - if (instruction->GetRight()->IsConstant() && - instruction->GetRight()->AsConstant()->IsArithmeticZero()) { + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // ABOVE_OR_EQUAL lhs, lhs + // CONSTANT true + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (instruction->GetRight()->IsConstant() && + instruction->GetRight()->AsConstant()->IsArithmeticZero()) { // Replace code looking like // ABOVE_OR_EQUAL dst, src, 0 // unsigned src >= 0 is always true // with @@ -193,8 +394,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitAboveOrEqual(HAboveOrEqual* i } void InstructionWithAbsorbingInputSimplifier::VisitBelow(HBelow* instruction) { - if (instruction->GetRight()->IsConstant() && - instruction->GetRight()->AsConstant()->IsArithmeticZero()) { + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // BELOW lhs, lhs + // CONSTANT false + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (instruction->GetRight()->IsConstant() && + instruction->GetRight()->AsConstant()->IsArithmeticZero()) { // Replace code looking like // BELOW dst, src, 0 // unsigned src < 0 is always false // with @@ -205,8 +412,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitBelow(HBelow* instruction) { } void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* instruction) { - if (instruction->GetLeft()->IsConstant() && - instruction->GetLeft()->AsConstant()->IsArithmeticZero()) { + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // BELOW_OR_EQUAL lhs, lhs + // CONSTANT true + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (instruction->GetLeft()->IsConstant() && + instruction->GetLeft()->AsConstant()->IsArithmeticZero()) { // Replace code looking like // BELOW_OR_EQUAL dst, 0, src // unsigned 0 <= src is always true // with @@ -216,6 +429,55 @@ void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* i } } +void InstructionWithAbsorbingInputSimplifier::VisitGreaterThan(HGreaterThan* instruction) { + if (instruction->GetLeft() == instruction->GetRight() && + (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) || + instruction->IsLtBias())) { + // Replace code looking like + // GREATER_THAN lhs, lhs + // CONSTANT false + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitGreaterThanOrEqual( + HGreaterThanOrEqual* instruction) { + if (instruction->GetLeft() == instruction->GetRight() && + (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) || + instruction->IsGtBias())) { + // Replace code looking like + // GREATER_THAN_OR_EQUAL lhs, lhs + // CONSTANT true + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitLessThan(HLessThan* instruction) { + if (instruction->GetLeft() == instruction->GetRight() && + (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) || + instruction->IsGtBias())) { + // Replace code looking like + // LESS_THAN lhs, lhs + // CONSTANT false + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitLessThanOrEqual(HLessThanOrEqual* instruction) { + if (instruction->GetLeft() == instruction->GetRight() && + (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) || + instruction->IsLtBias())) { + // Replace code looking like + // LESS_THAN_OR_EQUAL lhs, lhs + // CONSTANT true + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) { DataType::Type type = instruction->GetType(); HConstant* input_cst = instruction->GetConstantRight(); diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h index 72bd95b3cb..29648e907c 100644 --- a/compiler/optimizing/constant_folding.h +++ b/compiler/optimizing/constant_folding.h @@ -17,10 +17,12 @@ #ifndef ART_COMPILER_OPTIMIZING_CONSTANT_FOLDING_H_ #define ART_COMPILER_OPTIMIZING_CONSTANT_FOLDING_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" +#include "optimizing/optimizing_compiler_stats.h" -namespace art { +namespace art HIDDEN { /** * Optimization pass performing a simple constant-expression @@ -39,13 +41,20 @@ namespace art { */ class HConstantFolding : public HOptimization { public: - HConstantFolding(HGraph* graph, const char* name) : HOptimization(graph, name) {} + HConstantFolding(HGraph* graph, + OptimizingCompilerStats* stats = nullptr, + const char* name = kConstantFoldingPassName, + bool use_all_optimizations = false) + : HOptimization(graph, name, stats), use_all_optimizations_(use_all_optimizations) {} bool Run() override; static constexpr const char* kConstantFoldingPassName = "constant_folding"; private: + // Use all optimizations without restrictions. + bool use_all_optimizations_; + DISALLOW_COPY_AND_ASSIGN(HConstantFolding); }; diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 74d9d3a993..741fd3f822 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -17,6 +17,8 @@ #include <functional> #include "constant_folding.h" + +#include "base/macros.h" #include "dead_code_elimination.h" #include "driver/compiler_options.h" #include "graph_checker.h" @@ -25,12 +27,12 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for the constant folding and dce tests. */ -class ConstantFoldingTest : public OptimizingUnitTest { +class ConstantFoldingTest : public CommonCompilerTest, public OptimizingUnitTestHelper { public: ConstantFoldingTest() : graph_(nullptr) { } @@ -58,7 +60,9 @@ class ConstantFoldingTest : public OptimizingUnitTest { std::string actual_before = printer_before.str(); EXPECT_EQ(expected_before, actual_before); - HConstantFolding(graph_, "constant_folding").Run(); + HConstantFolding constant_folding( + graph_, /* stats= */ nullptr, "constant_folding", /* use_all_optimizations= */ true); + constant_folding.Run(); GraphChecker graph_checker_cf(graph_); graph_checker_cf.Run(); ASSERT_TRUE(graph_checker_cf.IsValid()); diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc index 3a1a9e023d..d9b7652f32 100644 --- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc @@ -20,12 +20,12 @@ #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" -namespace art { +namespace art HIDDEN { static constexpr bool kCfreLogFenceInputCount = false; // TODO: refactor this code by reusing escape analysis. -class CFREVisitor : public HGraphVisitor { +class CFREVisitor final : public HGraphVisitor { public: CFREVisitor(HGraph* graph, OptimizingCompilerStats* stats) : HGraphVisitor(graph), @@ -147,16 +147,6 @@ class CFREVisitor : public HGraphVisitor { void VisitAlias(HInstruction* aliasing_inst) { // An object is considered "published" if it becomes aliased by other instructions. if (HasInterestingPublishTargetAsInput(aliasing_inst)) { - // Note that constructing a "NullCheck" for new-instance, new-array, - // or a 'this' (receiver) reference is impossible. - // - // If by some reason we actually encounter such a NullCheck(FenceTarget), - // we LOG(WARNING). - if (UNLIKELY(aliasing_inst->IsNullCheck())) { - LOG(kIsDebugBuild ? FATAL : WARNING) - << "Unexpected instruction: NullCheck; should not be legal in graph"; - // We then do a best-effort to handle this case. - } MergeCandidateFences(); } } diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h index 014b342258..e04b986171 100644 --- a/compiler/optimizing/constructor_fence_redundancy_elimination.h +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ +#include "base/macros.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /* * Constructor Fence Redundancy Elimination (CFRE). diff --git a/compiler/optimizing/critical_native_abi_fixup_arm.cc b/compiler/optimizing/critical_native_abi_fixup_arm.cc index 3c4db4bca7..77e156608b 100644 --- a/compiler/optimizing/critical_native_abi_fixup_arm.cc +++ b/compiler/optimizing/critical_native_abi_fixup_arm.cc @@ -23,7 +23,7 @@ #include "scoped_thread_state_change-inl.h" #include "well_known_classes.h" -namespace art { +namespace art HIDDEN { namespace arm { // Fix up FP arguments passed in core registers for call to @CriticalNative by inserting fake calls @@ -45,9 +45,9 @@ static void FixUpArguments(HInvokeStaticOrDirect* invoke) { if (DataType::IsFloatingPointType(input_type)) { bool is_double = (input_type == DataType::Type::kFloat64); DataType::Type converted_type = is_double ? DataType::Type::kInt64 : DataType::Type::kInt32; - jmethodID known_method = is_double ? WellKnownClasses::java_lang_Double_doubleToRawLongBits - : WellKnownClasses::java_lang_Float_floatToRawIntBits; - ArtMethod* resolved_method = jni::DecodeArtMethod(known_method); + ArtMethod* resolved_method = is_double + ? WellKnownClasses::java_lang_Double_doubleToRawLongBits + : WellKnownClasses::java_lang_Float_floatToRawIntBits; DCHECK(resolved_method != nullptr); DCHECK(resolved_method->IsIntrinsic()); MethodReference target_method(nullptr, 0); @@ -74,7 +74,8 @@ static void FixUpArguments(HInvokeStaticOrDirect* invoke) { dispatch_info, kStatic, target_method, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !block->GetGraph()->IsDebuggable()); // The intrinsic has no side effects and does not need environment or dex cache on ARM. new_input->SetSideEffects(SideEffects::None()); IntrinsicOptimizations opt(new_input); diff --git a/compiler/optimizing/critical_native_abi_fixup_arm.h b/compiler/optimizing/critical_native_abi_fixup_arm.h index faa3c7a5fe..c2068f5e2d 100644 --- a/compiler/optimizing/critical_native_abi_fixup_arm.h +++ b/compiler/optimizing/critical_native_abi_fixup_arm.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_ARM_H_ #define ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_ARM_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { namespace arm { class CriticalNativeAbiFixupArm : public HOptimization { diff --git a/compiler/optimizing/data_type-inl.h b/compiler/optimizing/data_type-inl.h index 1b33b775da..bbfe90451b 100644 --- a/compiler/optimizing/data_type-inl.h +++ b/compiler/optimizing/data_type-inl.h @@ -20,7 +20,7 @@ #include "data_type.h" #include "dex/primitive.h" -namespace art { +namespace art HIDDEN { // Note: Not declared in data_type.h to avoid pulling in "primitive.h". constexpr DataType::Type DataTypeFromPrimitive(Primitive::Type type) { diff --git a/compiler/optimizing/data_type.cc b/compiler/optimizing/data_type.cc index cb354f46cc..183cf2c622 100644 --- a/compiler/optimizing/data_type.cc +++ b/compiler/optimizing/data_type.cc @@ -16,7 +16,7 @@ #include "data_type.h" -namespace art { +namespace art HIDDEN { static const char* kTypeNames[] = { "Reference", diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h index ec6ca7accb..b6d9519150 100644 --- a/compiler/optimizing/data_type.h +++ b/compiler/optimizing/data_type.h @@ -22,8 +22,9 @@ #include <android-base/logging.h> #include "base/bit_utils.h" +#include "base/macros.h" -namespace art { +namespace art HIDDEN { class DataType { public: diff --git a/compiler/optimizing/data_type_test.cc b/compiler/optimizing/data_type_test.cc index 8fea22bce8..f6f614d8c4 100644 --- a/compiler/optimizing/data_type_test.cc +++ b/compiler/optimizing/data_type_test.cc @@ -22,7 +22,7 @@ #include "base/macros.h" #include "dex/primitive.h" -namespace art { +namespace art HIDDEN { template <DataType::Type data_type, Primitive::Type primitive_type> static void CheckConversion() { diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index d808f2ca3a..cf49e39849 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -16,14 +16,17 @@ #include "dead_code_elimination.h" +#include "android-base/logging.h" #include "base/array_ref.h" #include "base/bit_vector-inl.h" +#include "base/logging.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "base/stl_util.h" +#include "optimizing/nodes.h" #include "ssa_phi_elimination.h" -namespace art { +namespace art HIDDEN { static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) { // Use local allocator for allocating memory. @@ -178,6 +181,13 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr } else if (!cond->InputAt(0)->IsNullConstant()) { return false; } + + // We can't create a BoundType for an object with an invalid RTI. + const ReferenceTypeInfo ti = obj->GetReferenceTypeInfo(); + if (!ti.IsValid()) { + return false; + } + // Scan all uses of obj and find null check under control dependence. HBoundType* bound = nullptr; const HUseList<HInstruction*>& uses = obj->GetUses(); @@ -190,7 +200,6 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr user_block != throws && block->Dominates(user_block)) { if (bound == nullptr) { - ReferenceTypeInfo ti = obj->GetReferenceTypeInfo(); bound = new (obj->GetBlock()->GetGraph()->GetAllocator()) HBoundType(obj); bound->SetUpperBound(ti, /*can_be_null*/ false); bound->SetReferenceTypeInfo(ti); @@ -213,6 +222,9 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr // | ... // | instr_n // | foo() // always throws +// | instr_n+2 +// | ... +// | instr_n+m // \ goto B2 // \ / // B2 @@ -230,11 +242,14 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr // B2 Exit // // Rationale: -// Removal of the never taken edge to B2 may expose -// other optimization opportunities, such as code sinking. +// Removal of the never taken edge to B2 may expose other optimization opportunities, such as code +// sinking. +// +// Note: The example above is a simple one that uses a `goto` but we could end the block with an If, +// for example. bool HDeadCodeElimination::SimplifyAlwaysThrows() { HBasicBlock* exit = graph_->GetExitBlock(); - if (exit == nullptr) { + if (!graph_->HasAlwaysThrowingInvokes() || exit == nullptr) { return false; } @@ -242,54 +257,55 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() { // Order does not matter, just pick one. for (HBasicBlock* block : graph_->GetReversePostOrder()) { - if (block->GetTryCatchInformation() != nullptr) { + if (block->IsTryBlock()) { // We don't want to perform the simplify always throws optimizations for throws inside of - // tries since those throws might not go to the exit block. We do that by checking the - // TryCatchInformation of the blocks. - // - // As a special case the `catch_block` is the first block of the catch and it has - // TryCatchInformation. Other blocks in the catch don't have try catch information (as long as - // they are not part of an outer try). Knowing if a `catch_block` is part of an outer try is - // possible by checking its successors, but other restrictions of the simplify always throws - // optimization will block `catch_block` nevertheless (e.g. only one predecessor) so it is not - // worth the effort. - - // TODO(solanes): Maybe we can do a `goto catch` if inside of a try catch instead of going to - // the exit. If we do so, we have to take into account that we should go to the nearest valid - // catch i.e. one that would accept our exception type. + // tries since those throws might not go to the exit block. continue; } - HInstruction* last = block->GetLastInstruction(); - HInstruction* prev = last->GetPrevious(); - if (prev == nullptr) { - DCHECK_EQ(block->GetFirstInstruction(), block->GetLastInstruction()); + // We iterate to find the first instruction that always throws. If two instructions always + // throw, the first one will throw and the second one will never be reached. + HInstruction* throwing_invoke = nullptr; + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + if (it.Current()->IsInvoke() && it.Current()->AsInvoke()->AlwaysThrows()) { + throwing_invoke = it.Current(); + break; + } + } + + if (throwing_invoke == nullptr) { + // No always-throwing instruction found. Continue with the rest of the blocks. continue; } - if (prev->AlwaysThrows() && - last->IsGoto() && - block->GetPhis().IsEmpty() && - block->GetPredecessors().size() == 1u) { - HBasicBlock* pred = block->GetSinglePredecessor(); - HBasicBlock* succ = block->GetSingleSuccessor(); - // Ensure no computations are merged through throwing block. - // This does not prevent the optimization per se, but would - // require an elaborate clean up of the SSA graph. - if (succ != exit && - !block->Dominates(pred) && - pred->Dominates(succ) && - succ->GetPredecessors().size() > 1u && - succ->GetPhis().IsEmpty()) { - block->ReplaceSuccessor(succ, exit); - rerun_dominance_and_loop_analysis = true; - MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke); - // Perform a quick follow up optimization on object != null control dependences - // that is much cheaper to perform now than in a later phase. - if (RemoveNonNullControlDependences(pred, block)) { - MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck); - } - } + // If we are already pointing at the exit block we could still remove the instructions + // between the always throwing instruction, and the exit block. If we have no other + // instructions, just continue since there's nothing to do. + if (block->GetSuccessors().size() == 1 && + block->GetSingleSuccessor() == exit && + block->GetLastInstruction()->GetPrevious() == throwing_invoke) { + continue; + } + + // We split the block at the throwing instruction, and the instructions after the throwing + // instructions will be disconnected from the graph after `block` points to the exit. + // `RemoveDeadBlocks` will take care of removing this new block and its instructions. + // Even though `SplitBefore` doesn't guarantee the graph to remain in SSA form, it is fine + // since we do not break it. + HBasicBlock* new_block = block->SplitBefore(throwing_invoke->GetNext(), + /* require_graph_not_in_ssa_form= */ false); + DCHECK_EQ(block->GetSingleSuccessor(), new_block); + block->ReplaceSuccessor(new_block, exit); + + rerun_dominance_and_loop_analysis = true; + MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke); + // Perform a quick follow up optimization on object != null control dependences + // that is much cheaper to perform now than in a later phase. + // If there are multiple predecessors, none may end with a HIf as required in + // RemoveNonNullControlDependences because we split critical edges. + if (block->GetPredecessors().size() == 1u && + RemoveNonNullControlDependences(block->GetSinglePredecessor(), block)) { + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck); } } @@ -303,54 +319,45 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() { return false; } -// Simplify the pattern: -// -// B1 B2 ... -// goto goto goto -// \ | / -// \ | / -// B3 -// i1 = phi(input, input) -// (i2 = condition on i1) -// if i1 (or i2) -// / \ -// / \ -// B4 B5 -// -// Into: -// -// B1 B2 ... -// | | | -// B4 B5 B? -// -// Note that individual edges can be redirected (for example B2->B3 -// can be redirected as B2->B5) without applying this optimization -// to other incoming edges. -// -// This simplification cannot be applied to catch blocks, because -// exception handler edges do not represent normal control flow. -// Though in theory this could still apply to normal control flow -// going directly to a catch block, we cannot support it at the -// moment because the catch Phi's inputs do not correspond to the -// catch block's predecessors, so we cannot identify which -// predecessor corresponds to a given statically evaluated input. -// -// We do not apply this optimization to loop headers as this could -// create irreducible loops. We rely on the suspend check in the -// loop header to prevent the pattern match. -// -// Note that we rely on the dead code elimination to get rid of B3. bool HDeadCodeElimination::SimplifyIfs() { bool simplified_one_or_more_ifs = false; bool rerun_dominance_and_loop_analysis = false; - for (HBasicBlock* block : graph_->GetReversePostOrder()) { + // Iterating in PostOrder it's better for MaybeAddPhi as it can add a Phi for multiple If + // instructions in a chain without updating the dominator chain. The branch redirection itself can + // work in PostOrder or ReversePostOrder without issues. + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (block->IsCatchBlock()) { + // This simplification cannot be applied to catch blocks, because exception handler edges do + // not represent normal control flow. Though in theory this could still apply to normal + // control flow going directly to a catch block, we cannot support it at the moment because + // the catch Phi's inputs do not correspond to the catch block's predecessors, so we cannot + // identify which predecessor corresponds to a given statically evaluated input. + continue; + } + HInstruction* last = block->GetLastInstruction(); - HInstruction* first = block->GetFirstInstruction(); - if (!block->IsCatchBlock() && - last->IsIf() && - block->HasSinglePhi() && + if (!last->IsIf()) { + continue; + } + + if (block->IsLoopHeader()) { + // We do not apply this optimization to loop headers as this could create irreducible loops. + continue; + } + + // We will add a Phi which allows the simplification to take place in cases where it wouldn't. + MaybeAddPhi(block); + + // TODO(solanes): Investigate support for multiple phis in `block`. We can potentially "push + // downwards" existing Phis into the true/false branches. For example, let's say we have another + // Phi: Phi(x1,x2,x3,x4,x5,x6). This could turn into Phi(x1,x2) in the true branch, Phi(x3,x4) + // in the false branch, and remain as Phi(x5,x6) in `block` (for edges that we couldn't + // redirect). We might even be able to remove some phis altogether as they will have only one + // value. + if (block->HasSinglePhi() && block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) { + HInstruction* first = block->GetFirstInstruction(); bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi()); bool has_only_phi_condition_and_if = !has_only_phi_and_if && @@ -361,7 +368,6 @@ bool HDeadCodeElimination::SimplifyIfs() { first->HasOnlyOneNonEnvironmentUse(); if (has_only_phi_and_if || has_only_phi_condition_and_if) { - DCHECK(!block->IsLoopHeader()); HPhi* phi = block->GetFirstPhi()->AsPhi(); bool phi_input_is_left = (first->InputAt(0) == phi); @@ -446,6 +452,125 @@ bool HDeadCodeElimination::SimplifyIfs() { return simplified_one_or_more_ifs; } +void HDeadCodeElimination::MaybeAddPhi(HBasicBlock* block) { + DCHECK(block->GetLastInstruction()->IsIf()); + HIf* if_instruction = block->GetLastInstruction()->AsIf(); + if (if_instruction->InputAt(0)->IsConstant()) { + // Constant values are handled in RemoveDeadBlocks. + return; + } + + if (block->GetNumberOfPredecessors() < 2u) { + // Nothing to redirect. + return; + } + + if (!block->GetPhis().IsEmpty()) { + // SimplifyIf doesn't currently work with multiple phis. Adding a phi here won't help that + // optimization. + return; + } + + HBasicBlock* dominator = block->GetDominator(); + if (!dominator->EndsWithIf()) { + return; + } + + HInstruction* input = if_instruction->InputAt(0); + HInstruction* dominator_input = dominator->GetLastInstruction()->AsIf()->InputAt(0); + const bool same_input = dominator_input == input; + if (!same_input) { + // Try to see if the dominator has the opposite input (e.g. if(cond) and if(!cond)). If that's + // the case, we can perform the optimization with the false and true branches reversed. + if (!dominator_input->IsCondition() || !input->IsCondition()) { + return; + } + + HCondition* block_cond = input->AsCondition(); + HCondition* dominator_cond = dominator_input->AsCondition(); + + if (block_cond->GetLeft() != dominator_cond->GetLeft() || + block_cond->GetRight() != dominator_cond->GetRight() || + block_cond->GetOppositeCondition() != dominator_cond->GetCondition()) { + return; + } + } + + if (kIsDebugBuild) { + // `block`'s successors should have only one predecessor. Otherwise, we have a critical edge in + // the graph. + for (HBasicBlock* succ : block->GetSuccessors()) { + DCHECK_EQ(succ->GetNumberOfPredecessors(), 1u); + } + } + + const size_t pred_size = block->GetNumberOfPredecessors(); + HPhi* new_phi = new (graph_->GetAllocator()) + HPhi(graph_->GetAllocator(), kNoRegNumber, pred_size, DataType::Type::kInt32); + + for (size_t index = 0; index < pred_size; index++) { + HBasicBlock* pred = block->GetPredecessors()[index]; + const bool dominated_by_true = + dominator->GetLastInstruction()->AsIf()->IfTrueSuccessor()->Dominates(pred); + const bool dominated_by_false = + dominator->GetLastInstruction()->AsIf()->IfFalseSuccessor()->Dominates(pred); + if (dominated_by_true == dominated_by_false) { + // In this case, we can't know if we are coming from the true branch, or the false branch. It + // happens in cases like: + // 1 (outer if) + // / \ + // 2 3 (inner if) + // | / \ + // | 4 5 + // \/ | + // 6 | + // \ | + // 7 (has the same if(cond) as 1) + // | + // 8 + // `7` (which would be `block` in this example), and `6` will come from both the true path and + // the false path of `1`. We bumped into something similar in SelectGenerator. See + // HSelectGenerator::TryFixupDoubleDiamondPattern. + // TODO(solanes): Figure out if we can fix up the graph into a double diamond in a generic way + // so that DeadCodeElimination and SelectGenerator can take advantage of it. + + if (!same_input) { + // `1` and `7` having the opposite condition is a case we are missing. We could potentially + // add a BooleanNot instruction to be able to add the Phi, but it seems like overkill since + // this case is not that common. + return; + } + + // The Phi will have `0`, `1`, and `cond` as inputs. If SimplifyIf redirects 0s and 1s, we + // will end up with Phi(cond,...,cond) which will be replaced by `cond`. Effectively, we will + // redirect edges that we are able to redirect and the rest will remain as before (i.e. we + // won't have an extra Phi). + new_phi->SetRawInputAt(index, input); + } else { + // Redirect to either the true branch (1), or the false branch (0). + // Given that `dominated_by_true` is the exact opposite of `dominated_by_false`, + // `(same_input && dominated_by_true) || (!same_input && dominated_by_false)` is equivalent to + // `same_input == dominated_by_true`. + new_phi->SetRawInputAt( + index, + same_input == dominated_by_true ? graph_->GetIntConstant(1) : graph_->GetIntConstant(0)); + } + } + + block->AddPhi(new_phi); + if_instruction->ReplaceInput(new_phi, 0); + + // Remove the old input now, if possible. This allows the branch redirection in SimplifyIf to + // work without waiting for another pass of DCE. + if (input->IsDeadAndRemovable()) { + DCHECK(!same_input) + << " if both blocks have the same condition, it shouldn't be dead and removable since the " + << "dominator block's If instruction would be using that condition."; + input->GetBlock()->RemoveInstruction(input); + } + MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyIfAddedPhi); +} + void HDeadCodeElimination::ConnectSuccessiveBlocks() { // Order does not matter. Skip the entry block by starting at index 1 in reverse post order. for (size_t i = 1u, size = graph_->GetReversePostOrder().size(); i != size; ++i) { @@ -466,7 +591,192 @@ void HDeadCodeElimination::ConnectSuccessiveBlocks() { } } -bool HDeadCodeElimination::RemoveDeadBlocks() { +struct HDeadCodeElimination::TryBelongingInformation { + explicit TryBelongingInformation(ScopedArenaAllocator* allocator) + : blocks_in_try(allocator->Adapter(kArenaAllocDCE)), + coalesced_try_entries(allocator->Adapter(kArenaAllocDCE)) {} + + // Which blocks belong in the try. + ScopedArenaSet<HBasicBlock*> blocks_in_try; + // Which other try entries are referencing this same try. + ScopedArenaSet<HBasicBlock*> coalesced_try_entries; +}; + +bool HDeadCodeElimination::CanPerformTryRemoval(const TryBelongingInformation& try_belonging_info) { + for (HBasicBlock* block : try_belonging_info.blocks_in_try) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + if (it.Current()->CanThrow()) { + return false; + } + } + } + return true; +} + +void HDeadCodeElimination::DisconnectHandlersAndUpdateTryBoundary( + HBasicBlock* block, + /* out */ bool* any_block_in_loop) { + if (block->IsInLoop()) { + *any_block_in_loop = true; + } + + // Disconnect the handlers. + while (block->GetSuccessors().size() > 1) { + HBasicBlock* handler = block->GetSuccessors()[1]; + DCHECK(handler->IsCatchBlock()); + block->RemoveSuccessor(handler); + handler->RemovePredecessor(block); + if (handler->IsInLoop()) { + *any_block_in_loop = true; + } + } + + // Change TryBoundary to Goto. + DCHECK(block->EndsWithTryBoundary()); + HInstruction* last = block->GetLastInstruction(); + block->RemoveInstruction(last); + block->AddInstruction(new (graph_->GetAllocator()) HGoto(last->GetDexPc())); + DCHECK_EQ(block->GetSuccessors().size(), 1u); +} + +void HDeadCodeElimination::RemoveTry(HBasicBlock* try_entry, + const TryBelongingInformation& try_belonging_info, + /* out */ bool* any_block_in_loop) { + // Update all try entries. + DCHECK(try_entry->EndsWithTryBoundary()); + DCHECK(try_entry->GetLastInstruction()->AsTryBoundary()->IsEntry()); + DisconnectHandlersAndUpdateTryBoundary(try_entry, any_block_in_loop); + + for (HBasicBlock* other_try_entry : try_belonging_info.coalesced_try_entries) { + DCHECK(other_try_entry->EndsWithTryBoundary()); + DCHECK(other_try_entry->GetLastInstruction()->AsTryBoundary()->IsEntry()); + DisconnectHandlersAndUpdateTryBoundary(other_try_entry, any_block_in_loop); + } + + // Update the blocks in the try. + for (HBasicBlock* block : try_belonging_info.blocks_in_try) { + // Update the try catch information since now the try doesn't exist. + block->SetTryCatchInformation(nullptr); + if (block->IsInLoop()) { + *any_block_in_loop = true; + } + + if (block->EndsWithTryBoundary()) { + // Try exits. + DCHECK(!block->GetLastInstruction()->AsTryBoundary()->IsEntry()); + DisconnectHandlersAndUpdateTryBoundary(block, any_block_in_loop); + + if (block->GetSingleSuccessor()->IsExitBlock()) { + // `block` used to be a single exit TryBoundary that got turned into a Goto. It + // is now pointing to the exit which we don't allow. To fix it, we disconnect + // `block` from its predecessor and RemoveDeadBlocks will remove it from the + // graph. + DCHECK(block->IsSingleGoto()); + HBasicBlock* predecessor = block->GetSinglePredecessor(); + predecessor->ReplaceSuccessor(block, graph_->GetExitBlock()); + + if (!block->GetDominatedBlocks().empty()) { + // Update domination tree if `block` dominates a block to keep the graph consistent. + DCHECK_EQ(block->GetDominatedBlocks().size(), 1u); + DCHECK_EQ(graph_->GetExitBlock()->GetDominator(), block); + predecessor->AddDominatedBlock(graph_->GetExitBlock()); + graph_->GetExitBlock()->SetDominator(predecessor); + block->RemoveDominatedBlock(graph_->GetExitBlock()); + } + } + } + } +} + +bool HDeadCodeElimination::RemoveUnneededTries() { + if (!graph_->HasTryCatch()) { + return false; + } + + // Use local allocator for allocating memory. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + + // Collect which blocks are part of which try. + std::unordered_map<HBasicBlock*, TryBelongingInformation> tries; + for (HBasicBlock* block : graph_->GetReversePostOrderSkipEntryBlock()) { + if (block->IsTryBlock()) { + HBasicBlock* key = block->GetTryCatchInformation()->GetTryEntry().GetBlock(); + auto it = tries.find(key); + if (it == tries.end()) { + it = tries.insert({key, TryBelongingInformation(&allocator)}).first; + } + it->second.blocks_in_try.insert(block); + } + } + + // Deduplicate the tries which have different try entries but they are really the same try. + for (auto it = tries.begin(); it != tries.end(); it++) { + DCHECK(it->first->EndsWithTryBoundary()); + HTryBoundary* try_boundary = it->first->GetLastInstruction()->AsTryBoundary(); + for (auto other_it = next(it); other_it != tries.end(); /*other_it++ in the loop*/) { + DCHECK(other_it->first->EndsWithTryBoundary()); + HTryBoundary* other_try_boundary = other_it->first->GetLastInstruction()->AsTryBoundary(); + if (try_boundary->HasSameExceptionHandlersAs(*other_try_boundary)) { + // Merge the entries as they are really the same one. + // Block merging. + it->second.blocks_in_try.insert(other_it->second.blocks_in_try.begin(), + other_it->second.blocks_in_try.end()); + + // Add the coalesced try entry to update it too. + it->second.coalesced_try_entries.insert(other_it->first); + + // Erase the other entry. + other_it = tries.erase(other_it); + } else { + other_it++; + } + } + } + + size_t removed_tries = 0; + bool any_block_in_loop = false; + + // Check which tries contain throwing instructions. + for (const auto& entry : tries) { + if (CanPerformTryRemoval(entry.second)) { + ++removed_tries; + RemoveTry(entry.first, entry.second, &any_block_in_loop); + } + } + + if (removed_tries != 0) { + // We want to: + // 1) Update the dominance information + // 2) Remove catch block subtrees, if they are now unreachable. + // If we run the dominance recomputation without removing the code, those catch blocks will + // not be part of the post order and won't be removed. If we don't run the dominance + // recomputation, we risk RemoveDeadBlocks not running it and leaving the graph in an + // inconsistent state. So, what we can do is run RemoveDeadBlocks and force a recomputation. + // Note that we are not guaranteed to remove a catch block if we have nested try blocks: + // + // try { + // ... nothing can throw. TryBoundary A ... + // try { + // ... can throw. TryBoundary B... + // } catch (Error e) {} + // } catch (Exception e) {} + // + // In the example above, we can remove the TryBoundary A but the Exception catch cannot be + // removed as the TryBoundary B might still throw into that catch. TryBoundary A and B don't get + // coalesced since they have different catch handlers. + + RemoveDeadBlocks(/* force_recomputation= */ true, any_block_in_loop); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedTry, removed_tries); + return true; + } else { + return false; + } +} + +bool HDeadCodeElimination::RemoveDeadBlocks(bool force_recomputation, + bool force_loop_recomputation) { + DCHECK_IMPLIES(force_loop_recomputation, force_recomputation); + // Use local allocator for allocating memory. ScopedArenaAllocator allocator(graph_->GetArenaStack()); @@ -495,8 +805,8 @@ bool HDeadCodeElimination::RemoveDeadBlocks() { // If we removed at least one block, we need to recompute the full // dominator tree and try block membership. - if (removed_one_or_more_blocks) { - if (rerun_dominance_and_loop_analysis) { + if (removed_one_or_more_blocks || force_recomputation) { + if (rerun_dominance_and_loop_analysis || force_loop_recomputation) { graph_->ClearLoopInformation(); graph_->ClearDominanceInformation(); graph_->BuildDominatorTree(); @@ -530,6 +840,33 @@ void HDeadCodeElimination::RemoveDeadInstructions() { } } +void HDeadCodeElimination::UpdateGraphFlags() { + bool has_monitor_operations = false; + bool has_simd = false; + bool has_bounds_checks = false; + bool has_always_throwing_invokes = false; + + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsMonitorOperation()) { + has_monitor_operations = true; + } else if (instruction->IsVecOperation()) { + has_simd = true; + } else if (instruction->IsBoundsCheck()) { + has_bounds_checks = true; + } else if (instruction->IsInvoke() && instruction->AsInvoke()->AlwaysThrows()) { + has_always_throwing_invokes = true; + } + } + } + + graph_->SetHasMonitorOperations(has_monitor_operations); + graph_->SetHasSIMD(has_simd); + graph_->SetHasBoundsChecks(has_bounds_checks); + graph_->SetHasAlwaysThrowingInvokes(has_always_throwing_invokes); +} + bool HDeadCodeElimination::Run() { // Do not eliminate dead blocks if the graph has irreducible loops. We could // support it, but that would require changes in our loop representation to handle @@ -541,6 +878,11 @@ bool HDeadCodeElimination::Run() { did_any_simplification |= SimplifyAlwaysThrows(); did_any_simplification |= SimplifyIfs(); did_any_simplification |= RemoveDeadBlocks(); + // We call RemoveDeadBlocks before RemoveUnneededTries to remove the dead blocks from the + // previous optimizations. Otherwise, we might detect that a try has throwing instructions but + // they are actually dead code. RemoveUnneededTryBoundary will call RemoveDeadBlocks again if + // needed. + did_any_simplification |= RemoveUnneededTries(); if (did_any_simplification) { // Connect successive blocks created by dead branches. ConnectSuccessiveBlocks(); @@ -548,6 +890,7 @@ bool HDeadCodeElimination::Run() { } SsaRedundantPhiElimination(graph_).Run(); RemoveDeadInstructions(); + UpdateGraphFlags(); return true; } diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 799721acf2..ddd01f7103 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -17,11 +17,12 @@ #ifndef ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" #include "optimizing_compiler_stats.h" -namespace art { +namespace art HIDDEN { /** * Optimization pass performing dead code elimination (removal of @@ -39,11 +40,87 @@ class HDeadCodeElimination : public HOptimization { private: void MaybeRecordDeadBlock(HBasicBlock* block); void MaybeRecordSimplifyIf(); - bool RemoveDeadBlocks(); + // If `force_recomputation` is true, we will recompute the dominance information even when we + // didn't delete any blocks. `force_loop_recomputation` is similar but it also forces the loop + // information recomputation. + bool RemoveDeadBlocks(bool force_recomputation = false, bool force_loop_recomputation = false); void RemoveDeadInstructions(); bool SimplifyAlwaysThrows(); + // Simplify the pattern: + // + // B1 B2 ... + // goto goto goto + // \ | / + // \ | / + // B3 + // i1 = phi(input, input) + // (i2 = condition on i1) + // if i1 (or i2) + // / \ + // / \ + // B4 B5 + // + // Into: + // + // B1 B2 ... + // | | | + // B4 B5 B? + // + // Note that individual edges can be redirected (for example B2->B3 + // can be redirected as B2->B5) without applying this optimization + // to other incoming edges. + // + // Note that we rely on the dead code elimination to get rid of B3. bool SimplifyIfs(); void ConnectSuccessiveBlocks(); + // Updates the graph flags related to instructions (e.g. HasSIMD()) since we may have eliminated + // the relevant instructions. There's no need to update `SetHasTryCatch` since we do that in + // `ComputeTryBlockInformation`. Similarly with `HasLoops` and `HasIrreducibleLoops`: They are + // cleared in `ClearLoopInformation` and then set as true as part of `HLoopInformation::Populate`, + // if needed. + void UpdateGraphFlags(); + + // Helper struct to eliminate tries. + struct TryBelongingInformation; + // Disconnects `block`'s handlers and update its `TryBoundary` instruction to a `Goto`. + // Sets `any_block_in_loop` to true if any block is currently a loop to later update the loop + // information if needed. + void DisconnectHandlersAndUpdateTryBoundary(HBasicBlock* block, + /* out */ bool* any_block_in_loop); + // Returns true iff the try doesn't contain throwing instructions. + bool CanPerformTryRemoval(const TryBelongingInformation& try_belonging_info); + // Removes the try by disconnecting all try entries and exits from their handlers. Also updates + // the graph in the case that a `TryBoundary` instruction of kind `exit` has the Exit block as + // its successor. + void RemoveTry(HBasicBlock* try_entry, + const TryBelongingInformation& try_belonging_info, + bool* any_block_in_loop); + // Checks which tries (if any) are currently in the graph, coalesces the different try entries + // that are referencing the same try, and removes the tries which don't contain any throwing + // instructions. + bool RemoveUnneededTries(); + + // Adds a phi in `block`, if `block` and its dominator have the same (or opposite) condition. + // For example it turns: + // if(cond) + // / \ + // B1 B2 + // \ / + // if(cond) + // / \ + // B3 B4 + // + // into: + // if(cond) + // / \ + // B1 B2 + // \ / + // if(Phi(1, 0)) + // / \ + // B3 B4 + // + // Following this, SimplifyIfs is able to connect B1->B3 and B2->B4 effectively skipping an if. + void MaybeAddPhi(HBasicBlock* block); DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination); }; diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index f5cd4dc27a..b789434add 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -16,6 +16,7 @@ #include "dead_code_elimination.h" +#include "base/macros.h" #include "driver/compiler_options.h" #include "graph_checker.h" #include "optimizing_unit_test.h" @@ -23,9 +24,9 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { -class DeadCodeEliminationTest : public OptimizingUnitTest { +class DeadCodeEliminationTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, const std::string& expected_before, diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 1d72ba116e..5f366ebcd9 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "dex/dex_instruction.h" #include "nodes.h" @@ -22,9 +23,9 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { -class OptimizerTest : public OptimizingUnitTest { +class OptimizerTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, const uint32_t* blocks, size_t blocks_length); }; diff --git a/compiler/optimizing/escape.cc b/compiler/optimizing/escape.cc index 617833c697..cebe94fd0d 100644 --- a/compiler/optimizing/escape.cc +++ b/compiler/optimizing/escape.cc @@ -18,7 +18,7 @@ #include "nodes.h" -namespace art { +namespace art HIDDEN { void VisitEscapes(HInstruction* reference, EscapeVisitor& escape_visitor) { // References not allocated in the method are intrinsically escaped. diff --git a/compiler/optimizing/escape.h b/compiler/optimizing/escape.h index 5402cb1763..3b284fbf43 100644 --- a/compiler/optimizing/escape.h +++ b/compiler/optimizing/escape.h @@ -17,7 +17,9 @@ #ifndef ART_COMPILER_OPTIMIZING_ESCAPE_H_ #define ART_COMPILER_OPTIMIZING_ESCAPE_H_ -namespace art { +#include "base/macros.h" + +namespace art HIDDEN { class HInstruction; diff --git a/compiler/optimizing/execution_subgraph.cc b/compiler/optimizing/execution_subgraph.cc index 66fdfcda5b..06aabbe040 100644 --- a/compiler/optimizing/execution_subgraph.cc +++ b/compiler/optimizing/execution_subgraph.cc @@ -26,7 +26,7 @@ #include "base/scoped_arena_allocator.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { ExecutionSubgraph::ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator) : graph_(graph), diff --git a/compiler/optimizing/execution_subgraph.h b/compiler/optimizing/execution_subgraph.h index 7d2a66077d..5ddf17de60 100644 --- a/compiler/optimizing/execution_subgraph.h +++ b/compiler/optimizing/execution_subgraph.h @@ -27,6 +27,7 @@ #include "base/bit_vector-inl.h" #include "base/globals.h" #include "base/iteration_range.h" +#include "base/macros.h" #include "base/mutex.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" @@ -34,7 +35,7 @@ #include "base/transform_iterator.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { // Helper for transforming blocks to block_ids. class BlockToBlockIdTransformer { diff --git a/compiler/optimizing/execution_subgraph_test.cc b/compiler/optimizing/execution_subgraph_test.cc index 74c243b5b4..921ef056ba 100644 --- a/compiler/optimizing/execution_subgraph_test.cc +++ b/compiler/optimizing/execution_subgraph_test.cc @@ -37,7 +37,7 @@ #include "optimizing_unit_test.h" #include "scoped_thread_state_change.h" -namespace art { +namespace art HIDDEN { using BlockSet = std::unordered_set<const HBasicBlock*>; diff --git a/compiler/optimizing/execution_subgraph_test.h b/compiler/optimizing/execution_subgraph_test.h index 13cb2bc7c5..cee105a045 100644 --- a/compiler/optimizing/execution_subgraph_test.h +++ b/compiler/optimizing/execution_subgraph_test.h @@ -19,7 +19,9 @@ #include "android-base/macros.h" -namespace art { +#include "base/macros.h" + +namespace art HIDDEN { class HGraph; class ExecutionSubgraph; diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index 75b8e9609e..8857b2a775 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" @@ -25,9 +26,9 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { -class FindLoopsTest : public OptimizingUnitTest {}; +class FindLoopsTest : public CommonCompilerTest, public OptimizingUnitTestHelper {}; TEST_F(FindLoopsTest, CFG1) { // Constant is not used. diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index d1769cea0d..190b362145 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -32,7 +32,7 @@ #include "scoped_thread_state_change-inl.h" #include "subtype_check.h" -namespace art { +namespace art HIDDEN { using android::base::StringPrintf; @@ -80,9 +80,91 @@ size_t GraphChecker::Run(bool pass_change, size_t last_size) { // as the latter might visit dead blocks removed by the dominator // computation. VisitReversePostOrder(); + CheckGraphFlags(); return current_size; } +void GraphChecker::VisitReversePostOrder() { + for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { + if (block->IsInLoop()) { + flag_info_.seen_loop = true; + if (block->GetLoopInformation()->IsIrreducible()) { + flag_info_.seen_irreducible_loop = true; + } + } + + VisitBasicBlock(block); + } +} + +static const char* StrBool(bool val) { + return val ? "true" : "false"; +} + +void GraphChecker::CheckGraphFlags() { + if (GetGraph()->HasMonitorOperations() != flag_info_.seen_monitor_operation) { + AddError( + StringPrintf("Flag mismatch: HasMonitorOperations() (%s) should be equal to " + "flag_info_.seen_monitor_operation (%s)", + StrBool(GetGraph()->HasMonitorOperations()), + StrBool(flag_info_.seen_monitor_operation))); + } + + if (GetGraph()->HasTryCatch() != flag_info_.seen_try_boundary) { + AddError( + StringPrintf("Flag mismatch: HasTryCatch() (%s) should be equal to " + "flag_info_.seen_try_boundary (%s)", + StrBool(GetGraph()->HasTryCatch()), + StrBool(flag_info_.seen_try_boundary))); + } + + if (GetGraph()->HasLoops() != flag_info_.seen_loop) { + AddError( + StringPrintf("Flag mismatch: HasLoops() (%s) should be equal to " + "flag_info_.seen_loop (%s)", + StrBool(GetGraph()->HasLoops()), + StrBool(flag_info_.seen_loop))); + } + + if (GetGraph()->HasIrreducibleLoops() && !GetGraph()->HasLoops()) { + AddError(StringPrintf("Flag mismatch: HasIrreducibleLoops() (%s) implies HasLoops() (%s)", + StrBool(GetGraph()->HasIrreducibleLoops()), + StrBool(GetGraph()->HasLoops()))); + } + + if (GetGraph()->HasIrreducibleLoops() != flag_info_.seen_irreducible_loop) { + AddError( + StringPrintf("Flag mismatch: HasIrreducibleLoops() (%s) should be equal to " + "flag_info_.seen_irreducible_loop (%s)", + StrBool(GetGraph()->HasIrreducibleLoops()), + StrBool(flag_info_.seen_irreducible_loop))); + } + + if (GetGraph()->HasSIMD() != flag_info_.seen_SIMD) { + AddError( + StringPrintf("Flag mismatch: HasSIMD() (%s) should be equal to " + "flag_info_.seen_SIMD (%s)", + StrBool(GetGraph()->HasSIMD()), + StrBool(flag_info_.seen_SIMD))); + } + + if (GetGraph()->HasBoundsChecks() != flag_info_.seen_bounds_checks) { + AddError( + StringPrintf("Flag mismatch: HasBoundsChecks() (%s) should be equal to " + "flag_info_.seen_bounds_checks (%s)", + StrBool(GetGraph()->HasBoundsChecks()), + StrBool(flag_info_.seen_bounds_checks))); + } + + if (GetGraph()->HasAlwaysThrowingInvokes() != flag_info_.seen_always_throwing_invokes) { + AddError( + StringPrintf("Flag mismatch: HasAlwaysThrowingInvokes() (%s) should be equal to " + "flag_info_.seen_always_throwing_invokes (%s)", + StrBool(GetGraph()->HasAlwaysThrowingInvokes()), + StrBool(flag_info_.seen_always_throwing_invokes))); + } +} + void GraphChecker::VisitBasicBlock(HBasicBlock* block) { current_block_ = block; @@ -159,6 +241,24 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } } + // Make sure the first instruction of a catch block is always a Nop that emits an environment. + if (block->IsCatchBlock()) { + if (!block->GetFirstInstruction()->IsNop()) { + AddError(StringPrintf("Block %d doesn't have a Nop as its first instruction.", + current_block_->GetBlockId())); + } else { + HNop* nop = block->GetFirstInstruction()->AsNop(); + if (!nop->NeedsEnvironment()) { + AddError( + StringPrintf("%s:%d is a Nop and the first instruction of block %d, but it doesn't " + "need an environment.", + nop->DebugName(), + nop->GetId(), + current_block_->GetBlockId())); + } + } + } + // Visit this block's list of phis. for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); @@ -219,6 +319,12 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } } + // Ensure all blocks have at least one successor, except the Exit block. + if (block->GetSuccessors().empty() && !block->IsExitBlock()) { + AddError(StringPrintf("Block %d has no successor and it is not the Exit block.", + block->GetBlockId())); + } + // Ensure there is no critical edge (i.e., an edge connecting a // block with multiple successors to a block with multiple // predecessors). Exceptional edges are synthesized and hence @@ -291,27 +397,30 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) { + VisitInstruction(check); + if (!GetGraph()->HasBoundsChecks()) { - AddError(StringPrintf("Instruction %s:%d is a HBoundsCheck, " - "but HasBoundsChecks() returns false", - check->DebugName(), - check->GetId())); + AddError( + StringPrintf("The graph doesn't have the HasBoundsChecks flag set but we saw " + "%s:%d in block %d.", + check->DebugName(), + check->GetId(), + check->GetBlock()->GetBlockId())); } - // Perform the instruction base checks too. - VisitInstruction(check); + flag_info_.seen_bounds_checks = true; } void GraphChecker::VisitDeoptimize(HDeoptimize* deopt) { + VisitInstruction(deopt); if (GetGraph()->IsCompilingOsr()) { AddError(StringPrintf("A graph compiled OSR cannot have a HDeoptimize instruction")); } - - // Perform the instruction base checks too. - VisitInstruction(deopt); } void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) { + VisitInstruction(try_boundary); + ArrayRef<HBasicBlock* const> handlers = try_boundary->GetExceptionHandlers(); // Ensure that all exception handlers are catch blocks. @@ -338,24 +447,65 @@ void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) { } } - VisitInstruction(try_boundary); + if (!GetGraph()->HasTryCatch()) { + AddError( + StringPrintf("The graph doesn't have the HasTryCatch flag set but we saw " + "%s:%d in block %d.", + try_boundary->DebugName(), + try_boundary->GetId(), + try_boundary->GetBlock()->GetBlockId())); + } + + flag_info_.seen_try_boundary = true; +} + +void GraphChecker::VisitLoadClass(HLoadClass* load) { + VisitInstruction(load); + + if (load->GetLoadedClassRTI().IsValid() && !load->GetLoadedClassRTI().IsExact()) { + std::stringstream ssRTI; + ssRTI << load->GetLoadedClassRTI(); + AddError(StringPrintf("%s:%d in block %d with RTI %s has valid but inexact RTI.", + load->DebugName(), + load->GetId(), + load->GetBlock()->GetBlockId(), + ssRTI.str().c_str())); + } } void GraphChecker::VisitLoadException(HLoadException* load) { - // Ensure that LoadException is the first instruction in a catch block. + VisitInstruction(load); + + // Ensure that LoadException is the second instruction in a catch block. The first one should be a + // Nop (checked separately). if (!load->GetBlock()->IsCatchBlock()) { AddError(StringPrintf("%s:%d is in a non-catch block %d.", load->DebugName(), load->GetId(), load->GetBlock()->GetBlockId())); - } else if (load->GetBlock()->GetFirstInstruction() != load) { - AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.", + } else if (load->GetBlock()->GetFirstInstruction()->GetNext() != load) { + AddError(StringPrintf("%s:%d is not the second instruction in catch block %d.", load->DebugName(), load->GetId(), load->GetBlock()->GetBlockId())); } } +void GraphChecker::VisitMonitorOperation(HMonitorOperation* monitor_op) { + VisitInstruction(monitor_op); + + if (!GetGraph()->HasMonitorOperations()) { + AddError( + StringPrintf("The graph doesn't have the HasMonitorOperations flag set but we saw " + "%s:%d in block %d.", + monitor_op->DebugName(), + monitor_op->GetId(), + monitor_op->GetBlock()->GetBlockId())); + } + + flag_info_.seen_monitor_operation = true; +} + void GraphChecker::VisitInstruction(HInstruction* instruction) { if (seen_ids_.IsBitSet(instruction->GetId())) { AddError(StringPrintf("Instruction id %d is duplicate in graph.", @@ -497,33 +647,16 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } } - // Ensure that reference type instructions have reference type info. - if (check_reference_type_info_ && instruction->GetType() == DataType::Type::kReference) { - if (!instruction->GetReferenceTypeInfo().IsValid()) { - AddError(StringPrintf("Reference type instruction %s:%d does not have " - "valid reference type information.", - instruction->DebugName(), - instruction->GetId())); - } - } - if (instruction->CanThrow() && !instruction->HasEnvironment()) { AddError(StringPrintf("Throwing instruction %s:%d in block %d does not have an environment.", instruction->DebugName(), instruction->GetId(), current_block_->GetBlockId())); } else if (instruction->CanThrowIntoCatchBlock()) { - // Find the top-level environment. This corresponds to the environment of - // the catch block since we do not inline methods with try/catch. - HEnvironment* environment = instruction->GetEnvironment(); - while (environment->GetParent() != nullptr) { - environment = environment->GetParent(); - } - - // Find all catch blocks and test that `instruction` has an environment - // value for each one. + // Find all catch blocks and test that `instruction` has an environment value for each one. const HTryBoundary& entry = instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry(); for (HBasicBlock* catch_block : entry.GetExceptionHandlers()) { + const HEnvironment* environment = catch_block->GetFirstInstruction()->GetEnvironment(); for (HInstructionIterator phi_it(catch_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { HPhi* catch_phi = phi_it.Current()->AsPhi(); if (environment->GetInstructionAt(catch_phi->GetRegNumber()) == nullptr) { @@ -541,9 +674,26 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } } -void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { +void GraphChecker::VisitInvoke(HInvoke* invoke) { VisitInstruction(invoke); + if (invoke->AlwaysThrows()) { + if (!GetGraph()->HasAlwaysThrowingInvokes()) { + AddError( + StringPrintf("The graph doesn't have the HasAlwaysThrowingInvokes flag set but we saw " + "%s:%d in block %d and it always throws.", + invoke->DebugName(), + invoke->GetId(), + invoke->GetBlock()->GetBlockId())); + } + flag_info_.seen_always_throwing_invokes = true; + } +} + +void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // We call VisitInvoke and not VisitInstruction to de-duplicate the always throwing code check. + VisitInvoke(invoke); + if (invoke->IsStaticWithExplicitClinitCheck()) { const HInstruction* last_input = invoke->GetInputs().back(); if (last_input == nullptr) { @@ -612,6 +762,17 @@ void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) { VisitInstruction(check); + + if (check->GetTargetClassRTI().IsValid() && !check->GetTargetClassRTI().IsExact()) { + std::stringstream ssRTI; + ssRTI << check->GetTargetClassRTI(); + AddError(StringPrintf("%s:%d in block %d with RTI %s has valid but inexact RTI.", + check->DebugName(), + check->GetId(), + check->GetBlock()->GetBlockId(), + ssRTI.str().c_str())); + } + HInstruction* input = check->InputAt(1); if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { if (!input->IsNullConstant()) { @@ -674,13 +835,14 @@ void GraphChecker::HandleLoop(HBasicBlock* loop_header) { loop_information->GetPreHeader()->GetSuccessors().size())); } - if (loop_information->GetSuspendCheck() == nullptr) { - AddError(StringPrintf( - "Loop with header %d does not have a suspend check.", - loop_header->GetBlockId())); + if (!GetGraph()->SuspendChecksAreAllowedToNoOp() && + loop_information->GetSuspendCheck() == nullptr) { + AddError(StringPrintf("Loop with header %d does not have a suspend check.", + loop_header->GetBlockId())); } - if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) { + if (!GetGraph()->SuspendChecksAreAllowedToNoOp() && + loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) { AddError(StringPrintf( "Loop header %d does not have the loop suspend check as the first instruction.", loop_header->GetBlockId())); @@ -1051,6 +1213,21 @@ void GraphChecker::VisitNeg(HNeg* instruction) { } } +void GraphChecker::VisitArraySet(HArraySet* instruction) { + VisitInstruction(instruction); + + if (instruction->NeedsTypeCheck() != + instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) { + AddError( + StringPrintf("%s %d has a flag mismatch. An ArraySet instruction can trigger a GC iff it " + "needs a type check. Needs type check: %s, Can trigger GC: %s", + instruction->DebugName(), + instruction->GetId(), + StrBool(instruction->NeedsTypeCheck()), + StrBool(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())))); + } +} + void GraphChecker::VisitBinaryOperation(HBinaryOperation* op) { VisitInstruction(op); DataType::Type lhs_type = op->InputAt(0)->GetType(); @@ -1111,6 +1288,8 @@ void GraphChecker::VisitBinaryOperation(HBinaryOperation* op) { } void GraphChecker::VisitConstant(HConstant* instruction) { + VisitInstruction(instruction); + HBasicBlock* block = instruction->GetBlock(); if (!block->IsEntryBlock()) { AddError(StringPrintf( @@ -1149,6 +1328,18 @@ void GraphChecker::VisitTypeConversion(HTypeConversion* instruction) { void GraphChecker::VisitVecOperation(HVecOperation* instruction) { VisitInstruction(instruction); + + if (!GetGraph()->HasSIMD()) { + AddError( + StringPrintf("The graph doesn't have the HasSIMD flag set but we saw " + "%s:%d in block %d.", + instruction->DebugName(), + instruction->GetId(), + instruction->GetBlock()->GetBlockId())); + } + + flag_info_.seen_SIMD = true; + if (codegen_ == nullptr) { return; } diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 04c8d2103c..d6644f3b50 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -21,10 +21,11 @@ #include "base/arena_bit_vector.h" #include "base/bit_vector-inl.h" +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; @@ -54,6 +55,7 @@ class GraphChecker : public HGraphDelegateVisitor { void VisitInstruction(HInstruction* instruction) override; void VisitPhi(HPhi* phi) override; + void VisitArraySet(HArraySet* instruction) override; void VisitBinaryOperation(HBinaryOperation* op) override; void VisitBooleanNot(HBooleanNot* instruction) override; void VisitBoundType(HBoundType* instruction) override; @@ -64,8 +66,11 @@ class GraphChecker : public HGraphDelegateVisitor { void VisitDeoptimize(HDeoptimize* instruction) override; void VisitIf(HIf* instruction) override; void VisitInstanceOf(HInstanceOf* check) override; + void VisitInvoke(HInvoke* invoke) override; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override; + void VisitLoadClass(HLoadClass* load) override; void VisitLoadException(HLoadException* load) override; + void VisitMonitorOperation(HMonitorOperation* monitor_operation) override; void VisitNeg(HNeg* instruction) override; void VisitPackedSwitch(HPackedSwitch* instruction) override; void VisitReturn(HReturn* ret) override; @@ -102,15 +107,6 @@ class GraphChecker : public HGraphDelegateVisitor { } } - // Enable/Disable the reference type info check. - // - // Return: the previous status of the check. - bool SetRefTypeInfoCheckEnabled(bool value = true) { - bool old_value = check_reference_type_info_; - check_reference_type_info_ = value; - return old_value; - } - protected: // Report a new error. void AddError(const std::string& error) { @@ -123,18 +119,30 @@ class GraphChecker : public HGraphDelegateVisitor { ArenaVector<std::string> errors_; private: + void VisitReversePostOrder(); + + // Checks that the graph's flags are set correctly. + void CheckGraphFlags(); + // String displayed before dumped errors. const char* const dump_prefix_; ScopedArenaAllocator allocator_; ArenaBitVector seen_ids_; - // Whether to perform the reference type info check for instructions which use or produce - // object references, e.g. HNewInstance, HLoadClass. - // The default value is true. - bool check_reference_type_info_ = true; // Used to access target information. CodeGenerator* codegen_; + struct FlagInfo { + bool seen_try_boundary = false; + bool seen_monitor_operation = false; + bool seen_loop = false; + bool seen_irreducible_loop = false; + bool seen_SIMD = false; + bool seen_bounds_checks = false; + bool seen_always_throwing_invokes = false; + }; + FlagInfo flag_info_; + DISALLOW_COPY_AND_ASSIGN(GraphChecker); }; diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index 08bfa5d80f..b256fbb46d 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -14,12 +14,13 @@ * limitations under the License. */ +#include "base/macros.h" #include "graph_checker.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { -class GraphCheckerTest : public OptimizingUnitTest { +class GraphCheckerTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: HGraph* CreateSimpleCFG(); void TestCode(const std::vector<uint16_t>& data); diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index 29af808731..b5d712736f 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -22,7 +23,7 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { class GraphTest : public OptimizingUnitTest { protected: diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 4a6ee13005..73bdd1e223 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -43,7 +43,7 @@ #include "ssa_liveness_analysis.h" #include "utils/assembler.h" -namespace art { +namespace art HIDDEN { // Unique pass-name to identify that the dump is for printing to log. constexpr const char* kDebugDumpName = "debug"; @@ -480,12 +480,20 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << array_set->GetValueCanBeNull() << std::noboolalpha; StartAttributeStream("needs_type_check") << std::boolalpha << array_set->NeedsTypeCheck() << std::noboolalpha; + StartAttributeStream("can_trigger_gc") + << std::boolalpha << array_set->GetSideEffects().Includes(SideEffects::CanTriggerGC()) + << std::noboolalpha; + StartAttributeStream("write_barrier_kind") << array_set->GetWriteBarrierKind(); } void VisitCompare(HCompare* compare) override { StartAttributeStream("bias") << compare->GetBias(); } + void VisitCondition(HCondition* condition) override { + StartAttributeStream("bias") << condition->GetBias(); + } + void VisitInvoke(HInvoke* invoke) override { StartAttributeStream("dex_file_index") << invoke->GetMethodReference().index; ArtMethod* method = invoke->GetResolvedMethod(); @@ -549,7 +557,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << iset->GetFieldType(); - StartAttributeStream("predicated") << std::boolalpha << iset->GetIsPredicatedSet(); + StartAttributeStream("predicated") + << std::boolalpha << iset->GetIsPredicatedSet() << std::noboolalpha; + StartAttributeStream("write_barrier_kind") << iset->GetWriteBarrierKind(); } void VisitStaticFieldGet(HStaticFieldGet* sget) override { @@ -564,6 +574,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { sset->GetFieldInfo().GetDexFile().PrettyField(sset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << sset->GetFieldType(); + StartAttributeStream("write_barrier_kind") << sset->GetWriteBarrierKind(); } void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) override { @@ -757,15 +768,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { instruction->IsCheckCast()) { StartAttributeStream("klass") << "unresolved"; } else { - // The NullConstant may be added to the graph during other passes that happen between - // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner - // doesn't run or doesn't inline anything, the NullConstant remains untyped. - // So we should check NullConstants for validity only after reference type propagation. - DCHECK(graph_in_bad_state_ || - IsDebugDump() || - (!is_after_pass_ && IsPass(HGraphBuilder::kBuilderPassName))) - << instruction->DebugName() << instruction->GetId() << " has invalid rti " - << (is_after_pass_ ? "after" : "before") << " pass " << pass_name_; + StartAttributeStream("klass") << "invalid"; } } if (disasm_info_ != nullptr) { @@ -904,6 +907,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { if (block->IsCatchBlock()) { PrintProperty("flags", "catch_block"); + } else if (block->IsTryBlock()) { + std::stringstream flags_properties; + flags_properties << "try_start " + << namer_.GetName(block->GetTryCatchInformation()->GetTryEntry().GetBlock()); + PrintProperty("flags", flags_properties.str().c_str()); } else if (!IsDebugDump()) { // Don't print useless information to logcat PrintEmptyProperty("flags"); diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index 3429c11cbd..9878917739 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -22,10 +22,11 @@ #include "arch/instruction_set.h" #include "base/arena_containers.h" +#include "base/macros.h" #include "base/value_object.h" #include "block_namer.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class DexCompilationUnit; diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index c7cd223b51..a6ca057cfc 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -23,7 +23,7 @@ #include "base/utils.h" #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { /** * A ValueSet holds instructions that can replace other instructions. It is updated diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h index bbf2265e98..df4e3a8dbf 100644 --- a/compiler/optimizing/gvn.h +++ b/compiler/optimizing/gvn.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_GVN_H_ #define ART_COMPILER_OPTIMIZING_GVN_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class SideEffectsAnalysis; diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index 3bf4cc35ba..1eb6307cb1 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -17,12 +17,13 @@ #include "gvn.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { class GVNTest : public OptimizingUnitTest {}; diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 3b5a2f1f9d..be6c268f5d 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -16,9 +16,10 @@ #include "induction_var_analysis.h" +#include "base/scoped_arena_containers.h" #include "induction_var_range.h" -namespace art { +namespace art HIDDEN { /** * Returns true if the from/to types denote a narrowing, integral conversion (precision loss). @@ -214,18 +215,25 @@ struct HInductionVarAnalysis::StackEntry { size_t low_depth; }; -HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph, const char* name) - : HOptimization(graph, name), +HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph, + OptimizingCompilerStats* stats, + const char* name) + : HOptimization(graph, name, stats), induction_(std::less<const HLoopInformation*>(), graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)), - cycles_(std::less<HPhi*>(), - graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) { + cycles_(std::less<HPhi*>(), graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) { } bool HInductionVarAnalysis::Run() { // Detects sequence variables (generalized induction variables) during an outer to inner // traversal of all loops using Gerlek's algorithm. The order is important to enable // range analysis on outer loop while visiting inner loops. + + if (IsPathologicalCase()) { + MaybeRecordStat(stats_, MethodCompilationStat::kNotVarAnalyzedPathological); + return false; + } + for (HBasicBlock* graph_block : graph_->GetReversePostOrder()) { // Don't analyze irreducible loops. if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) { @@ -1576,4 +1584,84 @@ std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) { return ""; } +void HInductionVarAnalysis::CalculateLoopHeaderPhisInARow( + HPhi* initial_phi, + ScopedArenaSafeMap<HPhi*, int>& cached_values, + ScopedArenaAllocator& allocator) { + DCHECK(initial_phi->IsLoopHeaderPhi()); + ScopedArenaQueue<HPhi*> worklist(allocator.Adapter(kArenaAllocInductionVarAnalysis)); + worklist.push(initial_phi); + // Used to check which phis are in the current chain we are checking. + ScopedArenaSet<HPhi*> phis_in_chain(allocator.Adapter(kArenaAllocInductionVarAnalysis)); + while (!worklist.empty()) { + HPhi* current_phi = worklist.front(); + DCHECK(current_phi->IsLoopHeaderPhi()); + if (cached_values.find(current_phi) != cached_values.end()) { + // Already processed. + worklist.pop(); + continue; + } + + phis_in_chain.insert(current_phi); + int max_value = 0; + bool pushed_other_phis = false; + for (size_t index = 0; index < current_phi->InputCount(); index++) { + // If the input is not a loop header phi, we only have 1 (current_phi). + int current_value = 1; + if (current_phi->InputAt(index)->IsLoopHeaderPhi()) { + HPhi* loop_header_phi = current_phi->InputAt(index)->AsPhi(); + auto it = cached_values.find(loop_header_phi); + if (it != cached_values.end()) { + current_value += it->second; + } else if (phis_in_chain.find(current_phi) == phis_in_chain.end()) { + // Push phis which aren't in the chain already to be processed. + pushed_other_phis = true; + worklist.push(loop_header_phi); + } + // Phis in the chain will get processed later. We keep `current_value` as 1 to avoid + // double counting `loop_header_phi`. + } + max_value = std::max(max_value, current_value); + } + + if (!pushed_other_phis) { + // Only finish processing after all inputs were processed. + worklist.pop(); + phis_in_chain.erase(current_phi); + cached_values.FindOrAdd(current_phi, max_value); + } + } +} + +bool HInductionVarAnalysis::IsPathologicalCase() { + ScopedArenaAllocator local_allocator(graph_->GetArenaStack()); + ScopedArenaSafeMap<HPhi*, int> cached_values( + std::less<HPhi*>(), local_allocator.Adapter(kArenaAllocInductionVarAnalysis)); + + // Due to how our induction passes work, we will take a lot of time compiling if we have several + // loop header phis in a row. If we have more than 15 different loop header phis in a row, we + // don't perform the analysis. + constexpr int kMaximumLoopHeaderPhisInARow = 15; + + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + if (!block->IsLoopHeader()) { + continue; + } + + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + DCHECK(it.Current()->IsLoopHeaderPhi()); + HPhi* phi = it.Current()->AsPhi(); + CalculateLoopHeaderPhisInARow(phi, cached_values, local_allocator); + DCHECK(cached_values.find(phi) != cached_values.end()) + << " we should have a value for Phi " << phi->GetId() + << " in block " << phi->GetBlock()->GetBlockId(); + if (cached_values.find(phi)->second > kMaximumLoopHeaderPhisInARow) { + return true; + } + } + } + + return false; +} + } // namespace art diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index 09417722da..050950089a 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -21,11 +21,12 @@ #include "base/arena_containers.h" #include "base/array_ref.h" +#include "base/macros.h" #include "base/scoped_arena_containers.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Induction variable analysis. This class does not have a direct public API. @@ -38,7 +39,9 @@ namespace art { */ class HInductionVarAnalysis : public HOptimization { public: - explicit HInductionVarAnalysis(HGraph* graph, const char* name = kInductionPassName); + explicit HInductionVarAnalysis(HGraph* graph, + OptimizingCompilerStats* stats = nullptr, + const char* name = kInductionPassName); bool Run() override; @@ -307,6 +310,15 @@ class HInductionVarAnalysis : public HOptimization { static std::string FetchToString(HInstruction* fetch); static std::string InductionToString(InductionInfo* info); + // Returns true if we have a pathological case we don't want to analyze. + bool IsPathologicalCase(); + // Starting with initial_phi, it calculates how many loop header phis in a row we have. To do + // this, we count the loop header phi which are used as an input of other loop header phis. It + // uses `cached_values` to avoid recomputing results. + void CalculateLoopHeaderPhisInARow(HPhi* initial_phi, + ScopedArenaSafeMap<HPhi*, int>& cached_values, + ScopedArenaAllocator& allocator); + /** * Maintains the results of the analysis as a mapping from loops to a mapping from instructions * to the induction information for that instruction in that loop. diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index 4c11ad4643..80c15371dc 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -17,12 +17,13 @@ #include <regex> #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "induction_var_analysis.h" #include "nodes.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for the InductionVarAnalysis tests. diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index ad3d1a9321..9b78699ead 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -17,8 +17,9 @@ #include "induction_var_range.h" #include <limits> +#include "optimizing/nodes.h" -namespace art { +namespace art HIDDEN { /** Returns true if 64-bit constant fits in 32-bit constant. */ static bool CanLongValueFitIntoInt(int64_t c) { @@ -1064,10 +1065,13 @@ bool InductionVarRange::GenerateRangeOrLastValue(const HBasicBlock* context, case HInductionVarAnalysis::kLinear: if (*stride_value > 0) { lower = nullptr; + return GenerateLastValueLinear( + context, loop, info, trip, graph, block, /*is_min=*/false, upper); } else { upper = nullptr; + return GenerateLastValueLinear( + context, loop, info, trip, graph, block, /*is_min=*/true, lower); } - break; case HInductionVarAnalysis::kPolynomial: return GenerateLastValuePolynomial(context, loop, info, trip, graph, block, lower); case HInductionVarAnalysis::kGeometric: @@ -1113,6 +1117,54 @@ bool InductionVarRange::GenerateRangeOrLastValue(const HBasicBlock* context, GenerateCode(context, loop, info, trip, graph, block, /*is_min=*/ false, upper); } +bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context, + const HLoopInformation* loop, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + HGraph* graph, + HBasicBlock* block, + bool is_min, + /*out*/ HInstruction** result) const { + DataType::Type type = info->type; + // Avoid any narrowing linear induction or any type mismatch between the linear induction and the + // trip count expression. + if (HInductionVarAnalysis::IsNarrowingLinear(info) || trip->type != type) { + return false; + } + + // Stride value must be a known constant that fits into int32. + int64_t stride_value = 0; + if (!IsConstant(context, loop, info->op_a, kExact, &stride_value) || + !CanLongValueFitIntoInt(stride_value)) { + return false; + } + + // We require `a` to be a constant value that didn't overflow. + const bool is_min_a = stride_value >= 0 ? is_min : !is_min; + Value val_a = GetVal(context, loop, trip, trip, is_min_a); + HInstruction* opb; + if (!IsConstantValue(val_a) || + !GenerateCode(context, loop, info->op_b, trip, graph, block, is_min, &opb)) { + return false; + } + + if (graph != nullptr) { + ArenaAllocator* allocator = graph->GetAllocator(); + HInstruction* oper; + HInstruction* opa = graph->GetConstant(type, val_a.b_constant); + if (stride_value == 1) { + oper = new (allocator) HAdd(type, opa, opb); + } else if (stride_value == -1) { + oper = new (graph->GetAllocator()) HSub(type, opb, opa); + } else { + HInstruction* mul = new (allocator) HMul(type, graph->GetConstant(type, stride_value), opa); + oper = new (allocator) HAdd(type, Insert(block, mul), opb); + } + *result = Insert(block, oper); + } + return true; +} + bool InductionVarRange::GenerateLastValuePolynomial(const HBasicBlock* context, const HLoopInformation* loop, HInductionVarAnalysis::InductionInfo* info, diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 552837c044..3e1212bec8 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INDUCTION_VAR_RANGE_H_ #define ART_COMPILER_OPTIMIZING_INDUCTION_VAR_RANGE_H_ +#include "base/macros.h" #include "induction_var_analysis.h" -namespace art { +namespace art HIDDEN { /** * This class implements range analysis on expressions within loops. It takes the results @@ -317,6 +318,15 @@ class InductionVarRange { /*out*/ bool* needs_finite_test, /*out*/ bool* needs_taken_test) const; + bool GenerateLastValueLinear(const HBasicBlock* context, + const HLoopInformation* loop, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + HGraph* graph, + HBasicBlock* block, + bool is_min, + /*out*/ HInstruction** result) const; + bool GenerateLastValuePolynomial(const HBasicBlock* context, const HLoopInformation* loop, HInductionVarAnalysis::InductionInfo* info, diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index 962123d948..d879897959 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -17,12 +17,13 @@ #include "induction_var_range.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "induction_var_analysis.h" #include "nodes.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { using Value = InductionVarRange::Value; @@ -1064,10 +1065,6 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) { HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_); ASSERT_TRUE(last->IsSub()); ExpectInt(1000, last->InputAt(0)); - ASSERT_TRUE(last->InputAt(1)->IsNeg()); - last = last->InputAt(1)->InputAt(0); - ASSERT_TRUE(last->IsSub()); - ExpectInt(0, last->InputAt(0)); ExpectInt(1000, last->InputAt(1)); // Loop logic. diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index f73c0d38e4..5a4478dc14 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -46,7 +46,7 @@ #include "thread.h" #include "verifier/verifier_compiler_binding.h" -namespace art { +namespace art HIDDEN { // Instruction limit to control memory. static constexpr size_t kMaximumNumberOfTotalInstructions = 1024; @@ -72,6 +72,9 @@ static constexpr size_t kMaximumNumberOfPolymorphicRecursiveCalls = 0; // Controls the use of inline caches in AOT mode. static constexpr bool kUseAOTInlineCaches = true; +// Controls the use of inlining try catches. +static constexpr bool kInlineTryCatches = true; + // We check for line numbers to make sure the DepthString implementation // aligns the output nicely. #define LOG_INTERNAL(msg) \ @@ -141,7 +144,11 @@ bool HInliner::Run() { } bool did_inline = false; - bool did_set_always_throws = false; + // The inliner is the only phase that sets invokes as `always throwing`, and since we only run the + // inliner once per graph this value should always be false at the beginning of the inlining + // phase. This is important since we use `HasAlwaysThrowingInvokes` to know whether the inliner + // phase performed a relevant change in the graph. + DCHECK(!graph_->HasAlwaysThrowingInvokes()); // Initialize the number of instructions for the method being compiled. Recursive calls // to HInliner::Run have already updated the instruction count. @@ -175,14 +182,14 @@ bool HInliner::Run() { HInstruction* next = instruction->GetNext(); HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. - if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { + if (call != nullptr && !codegen_->IsImplementedIntrinsic(call)) { if (honor_noinline_directives) { // Debugging case: directives in method names control or assert on inlining. std::string callee_name = call->GetMethodReference().PrettyMethod(/* with_signature= */ false); // Tests prevent inlining by having $noinline$ in their method names. if (callee_name.find("$noinline$") == std::string::npos) { - if (TryInline(call, &did_set_always_throws)) { + if (TryInline(call)) { did_inline = true; } else if (honor_inline_directives) { bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos); @@ -192,7 +199,7 @@ bool HInliner::Run() { } else { DCHECK(!honor_inline_directives); // Normal case: try to inline. - if (TryInline(call, &did_set_always_throws)) { + if (TryInline(call)) { did_inline = true; } } @@ -201,7 +208,9 @@ bool HInliner::Run() { } } - return did_inline || did_set_always_throws; + // We return true if we either inlined at least one method, or we marked one of our methods as + // always throwing. + return did_inline || graph_->HasAlwaysThrowingInvokes(); } static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) @@ -436,7 +445,7 @@ static bool AlwaysThrows(ArtMethod* method) return throw_seen; } -bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_always_throws) { +bool HInliner::TryInline(HInvoke* invoke_instruction) { MaybeRecordStat(stats_, MethodCompilationStat::kTryInline); // Don't bother to move further if we know the method is unresolved or the invocation is @@ -472,7 +481,8 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al bool result = TryInlineAndReplace(invoke_instruction, actual_method, ReferenceTypeInfo::CreateInvalid(), - /* do_rtp= */ true); + /* do_rtp= */ true, + /* is_speculative= */ false); if (result) { MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); if (outermost_graph_ == graph_) { @@ -487,11 +497,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al } else { invoke_to_analyze = invoke_instruction; } - // Set always throws property for non-inlined method call with single - // target. - if (AlwaysThrows(actual_method)) { - invoke_to_analyze->SetAlwaysThrows(true); - *did_set_always_throws = true; + // Set always throws property for non-inlined method call with single target. + if (invoke_instruction->AlwaysThrows() || AlwaysThrows(actual_method)) { + invoke_to_analyze->SetAlwaysThrows(/* always_throws= */ true); + graph_->SetHasAlwaysThrowingInvokes(/* value= */ true); } } return result; @@ -499,10 +508,27 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al DCHECK(!invoke_instruction->IsInvokeStaticOrDirect()); + // No try catch inlining allowed here, or recursively. For try catch inlining we are banking on + // the fact that we have a unique dex pc list. We cannot guarantee that for some TryInline methods + // e.g. `TryInlinePolymorphicCall`. + // TODO(solanes): Setting `try_catch_inlining_allowed_` to false here covers all cases from + // `TryInlineFromCHA` and from `TryInlineFromInlineCache` as well (e.g. + // `TryInlinePolymorphicCall`). Reassess to see if we can inline inline catch blocks in + // `TryInlineFromCHA`, `TryInlineMonomorphicCall` and `TryInlinePolymorphicCallToSameTarget`. + + // We store the value to restore it since we will use the same HInliner instance for other inlinee + // candidates. + const bool previous_value = try_catch_inlining_allowed_; + try_catch_inlining_allowed_ = false; + if (TryInlineFromCHA(invoke_instruction)) { + try_catch_inlining_allowed_ = previous_value; return true; } - return TryInlineFromInlineCache(invoke_instruction); + + const bool result = TryInlineFromInlineCache(invoke_instruction); + try_catch_inlining_allowed_ = previous_value; + return result; } bool HInliner::TryInlineFromCHA(HInvoke* invoke_instruction) { @@ -518,7 +544,8 @@ bool HInliner::TryInlineFromCHA(HInvoke* invoke_instruction) { if (!TryInlineAndReplace(invoke_instruction, method, ReferenceTypeInfo::CreateInvalid(), - /* do_rtp= */ true)) { + /* do_rtp= */ true, + /* is_speculative= */ true)) { return false; } AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor); @@ -786,7 +813,8 @@ bool HInliner::TryInlineMonomorphicCall( if (!TryInlineAndReplace(invoke_instruction, resolved_method, ReferenceTypeInfo::Create(monomorphic_type, /* is_exact= */ true), - /* do_rtp= */ false)) { + /* do_rtp= */ false, + /* is_speculative= */ true)) { return false; } @@ -802,7 +830,6 @@ bool HInliner::TryInlineMonomorphicCall( // Run type propagation to get the guard typed, and eventually propagate the // type of the receiver. ReferenceTypePropagation rtp_fixup(graph_, - outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), /* is_first_run= */ false); rtp_fixup.Run(); @@ -982,7 +1009,8 @@ bool HInliner::TryInlinePolymorphicCall( !TryBuildAndInline(invoke_instruction, method, ReferenceTypeInfo::Create(handle, /* is_exact= */ true), - &return_replacement)) { + &return_replacement, + /* is_speculative= */ true)) { all_targets_inlined = false; } else { one_target_inlined = true; @@ -1024,7 +1052,6 @@ bool HInliner::TryInlinePolymorphicCall( // Run type propagation to get the guards typed. ReferenceTypePropagation rtp_fixup(graph_, - outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), /* is_first_run= */ false); rtp_fixup.Run(); @@ -1160,7 +1187,8 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( if (!TryBuildAndInline(invoke_instruction, actual_method, ReferenceTypeInfo::CreateInvalid(), - &return_replacement)) { + &return_replacement, + /* is_speculative= */ true)) { return false; } @@ -1215,7 +1243,6 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( // Run type propagation to get the guard typed. ReferenceTypePropagation rtp_fixup(graph_, - outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), /* is_first_run= */ false); rtp_fixup.Run(); @@ -1232,7 +1259,6 @@ void HInliner::MaybeRunReferenceTypePropagation(HInstruction* replacement, // Actual return value has a more specific type than the method's declared // return type. Run RTP again on the outer graph to propagate it. ReferenceTypePropagation(graph_, - outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), /* is_first_run= */ false).Run(); } @@ -1246,6 +1272,13 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction, return false; } + // Don't try to devirtualize intrinsics as it breaks pattern matching from later phases. + // TODO(solanes): This `if` could be removed if we update optimizations like + // TryReplaceStringBuilderAppend. + if (invoke_instruction->IsIntrinsic()) { + return false; + } + // Don't bother trying to call directly a default conflict method. It // doesn't have a proper MethodReference, but also `GetCanonicalMethod` // will return an actual default implementation. @@ -1288,7 +1321,8 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction, dispatch_info, kDirect, MethodReference(method->GetDexFile(), method->GetDexMethodIndex()), - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInputsRef inputs = invoke_instruction->GetInputs(); DCHECK_EQ(inputs.size(), invoke_instruction->GetNumberOfArguments()); for (size_t index = 0; index != inputs.size(); ++index) { @@ -1301,7 +1335,7 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction, invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction); new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); if (invoke_instruction->GetType() == DataType::Type::kReference) { - new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo()); + new_invoke->SetReferenceTypeInfoIfValid(invoke_instruction->GetReferenceTypeInfo()); } *replacement = new_invoke; @@ -1316,11 +1350,13 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction, bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, ReferenceTypeInfo receiver_type, - bool do_rtp) { - DCHECK(!invoke_instruction->IsIntrinsic()); + bool do_rtp, + bool is_speculative) { + DCHECK(!codegen_->IsImplementedIntrinsic(invoke_instruction)); HInstruction* return_replacement = nullptr; - if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) { + if (!TryBuildAndInline( + invoke_instruction, method, receiver_type, &return_replacement, is_speculative)) { return false; } @@ -1378,6 +1414,15 @@ bool HInliner::IsInliningAllowed(ArtMethod* method, const CodeItemDataAccessor& return false; } + if (annotations::MethodIsNeverInline(*method->GetDexFile(), + method->GetClassDef(), + method->GetDexMethodIndex())) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNeverInlineAnnotation) + << "Method " << method->PrettyMethod() + << " has the @NeverInline annotation so it won't be inlined"; + return false; + } + return true; } @@ -1397,9 +1442,25 @@ bool HInliner::IsInliningSupported(const HInvoke* invoke_instruction, } if (accessor.TriesSize() != 0) { - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCallee) - << "Method " << method->PrettyMethod() << " is not inlined because of try block"; - return false; + if (!kInlineTryCatches) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchDisabled) + << "Method " << method->PrettyMethod() + << " is not inlined because inlining try catches is disabled globally"; + return false; + } + const bool disallowed_try_catch_inlining = + // Direct parent is a try block. + invoke_instruction->GetBlock()->IsTryBlock() || + // Indirect parent disallows try catch inlining. + !try_catch_inlining_allowed_; + if (disallowed_try_catch_inlining) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCallee) + << "Method " << method->PrettyMethod() + << " is not inlined because it has a try catch and we are not supporting it for this" + << " particular call. This is could be because e.g. it would be inlined inside another" + << " try block, we arrived here from TryInlinePolymorphicCall, etc."; + return false; + } } if (invoke_instruction->IsInvokeStaticOrDirect() && @@ -1416,9 +1477,9 @@ bool HInliner::IsInliningSupported(const HInvoke* invoke_instruction, return true; } -// Returns whether our resource limits allow inlining this method. -bool HInliner::IsInliningBudgetAvailable(ArtMethod* method, - const CodeItemDataAccessor& accessor) const { +bool HInliner::IsInliningEncouraged(const HInvoke* invoke_instruction, + ArtMethod* method, + const CodeItemDataAccessor& accessor) const { if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRecursiveBudget) << "Method " @@ -1438,13 +1499,21 @@ bool HInliner::IsInliningBudgetAvailable(ArtMethod* method, return false; } + if (invoke_instruction->GetBlock()->GetLastInstruction()->IsThrow()) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedEndsWithThrow) + << "Method " << method->PrettyMethod() + << " is not inlined because its block ends with a throw"; + return false; + } + return true; } bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, ReferenceTypeInfo receiver_type, - HInstruction** return_replacement) { + HInstruction** return_replacement, + bool is_speculative) { // If invoke_instruction is devirtualized to a different method, give intrinsics // another chance before we try to inline it. if (invoke_instruction->GetResolvedMethod() != method && method->IsIntrinsic()) { @@ -1459,7 +1528,8 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, invoke_instruction->GetMethodReference(), // Use existing invoke's method's reference. method, MethodReference(method->GetDexFile(), method->GetDexMethodIndex()), - method->GetMethodIndex()); + method->GetMethodIndex(), + !graph_->IsDebuggable()); DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone); HInputsRef inputs = invoke_instruction->GetInputs(); for (size_t index = 0; index != inputs.size(); ++index) { @@ -1468,7 +1538,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction); new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); if (invoke_instruction->GetType() == DataType::Type::kReference) { - new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo()); + new_invoke->SetReferenceTypeInfoIfValid(invoke_instruction->GetReferenceTypeInfo()); } *return_replacement = new_invoke; return true; @@ -1503,12 +1573,12 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - if (!IsInliningBudgetAvailable(method, accessor)) { + if (!IsInliningEncouraged(invoke_instruction, method, accessor)) { return false; } if (!TryBuildAndInlineHelper( - invoke_instruction, method, receiver_type, return_replacement)) { + invoke_instruction, method, receiver_type, return_replacement, is_speculative)) { return false; } @@ -1627,7 +1697,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, bool needs_constructor_barrier = false; for (size_t i = 0; i != number_of_iputs; ++i) { HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]); - if (!value->IsConstant() || !value->AsConstant()->IsZeroBitPattern()) { + if (!IsZeroBitPattern(value)) { uint16_t field_index = iput_field_indexes[i]; bool is_final; HInstanceFieldSet* iput = @@ -1684,7 +1754,6 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, Handle<mirror::DexCache> dex_cache = graph_->GetHandleCache()->NewHandle(referrer->GetDexCache()); ReferenceTypePropagation rtp(graph_, - outer_compilation_unit_.GetClassLoader(), dex_cache, /* is_first_run= */ false); rtp.Visit(iget); @@ -1795,7 +1864,7 @@ void HInliner::SubstituteArguments(HGraph* callee_graph, run_rtp = true; current->SetReferenceTypeInfo(receiver_type); } else { - current->SetReferenceTypeInfo(argument->GetReferenceTypeInfo()); + current->SetReferenceTypeInfoIfValid(argument->GetReferenceTypeInfo()); } current->AsParameterValue()->SetCanBeNull(argument->CanBeNull()); } @@ -1807,7 +1876,6 @@ void HInliner::SubstituteArguments(HGraph* callee_graph, // are more specific than the declared ones, run RTP again on the inner graph. if (run_rtp || ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) { ReferenceTypePropagation(callee_graph, - outer_compilation_unit_.GetClassLoader(), dex_compilation_unit.GetDexCache(), /* is_first_run= */ false).Run(); } @@ -1821,8 +1889,9 @@ void HInliner::SubstituteArguments(HGraph* callee_graph, // If this function returns true, it will also set out_number_of_instructions to // the number of instructions in the inlined body. bool HInliner::CanInlineBody(const HGraph* callee_graph, - const HBasicBlock* target_block, - size_t* out_number_of_instructions) const { + HInvoke* invoke, + size_t* out_number_of_instructions, + bool is_speculative) const { ArtMethod* const resolved_method = callee_graph->GetArtMethod(); HBasicBlock* exit_block = callee_graph->GetExitBlock(); @@ -1835,15 +1904,30 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, bool has_one_return = false; for (HBasicBlock* predecessor : exit_block->GetPredecessors()) { - if (predecessor->GetLastInstruction()->IsThrow()) { - if (target_block->IsTryBlock()) { - // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto. - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCaller) + const HInstruction* last_instruction = predecessor->GetLastInstruction(); + // On inlinees, we can have Return/ReturnVoid/Throw -> TryBoundary -> Exit. To check for the + // actual last instruction, we have to skip the TryBoundary instruction. + if (last_instruction->IsTryBoundary()) { + predecessor = predecessor->GetSinglePredecessor(); + last_instruction = predecessor->GetLastInstruction(); + + // If the last instruction chain is Return/ReturnVoid -> TryBoundary -> Exit we will have to + // split a critical edge in InlineInto and might recompute loop information, which is + // unsupported for irreducible loops. + if (!last_instruction->IsThrow() && graph_->HasIrreducibleLoops()) { + DCHECK(last_instruction->IsReturn() || last_instruction->IsReturnVoid()); + // TODO(ngeoffray): Support re-computing loop information to graphs with + // irreducible loops? + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCaller) << "Method " << resolved_method->PrettyMethod() - << " could not be inlined because one branch always throws and" - << " caller is in a try/catch block"; + << " could not be inlined because we will have to recompute the loop information and" + << " the caller has irreducible loops"; return false; - } else if (graph_->GetExitBlock() == nullptr) { + } + } + + if (last_instruction->IsThrow()) { + if (graph_->GetExitBlock() == nullptr) { // TODO(ngeoffray): Support adding HExit in the caller graph. LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedInfiniteLoop) << "Method " << resolved_method->PrettyMethod() @@ -1853,9 +1937,10 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, } else if (graph_->HasIrreducibleLoops()) { // TODO(ngeoffray): Support re-computing loop information to graphs with // irreducible loops? - VLOG(compiler) << "Method " << resolved_method->PrettyMethod() - << " could not be inlined because one branch always throws and" - << " caller has irreducible loops"; + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCaller) + << "Method " << resolved_method->PrettyMethod() + << " could not be inlined because one branch always throws and" + << " the caller has irreducible loops"; return false; } } else { @@ -1864,6 +1949,15 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, } if (!has_one_return) { + if (!is_speculative) { + // If we know that the method always throws with the particular parameters, set it as such. + // This is better than using the dex instructions as we have more information about this + // particular call. We don't mark speculative inlines (e.g. the ones from the inline cache) as + // always throwing since they might not throw when executed. + invoke->SetAlwaysThrows(/* always_throws= */ true); + graph_->SetHasAlwaysThrowingInvokes(/* value= */ true); + } + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedAlwaysThrows) << "Method " << resolved_method->PrettyMethod() << " could not be inlined because it always throws"; @@ -1882,7 +1976,7 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, if (block->GetLoopInformation()->IsIrreducible()) { // Don't inline methods with irreducible loops, they could prevent some // optimizations to run. - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoop) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCallee) << "Method " << resolved_method->PrettyMethod() << " could not be inlined because it contains an irreducible loop"; return false; @@ -1930,8 +2024,10 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, if (current->IsUnresolvedStaticFieldGet() || current->IsUnresolvedInstanceFieldGet() || current->IsUnresolvedStaticFieldSet() || - current->IsUnresolvedInstanceFieldSet()) { - // Entrypoint for unresolved fields does not handle inlined frames. + current->IsUnresolvedInstanceFieldSet() || + current->IsInvokeUnresolved()) { + // Unresolved invokes / field accesses are expensive at runtime when decoding inlining info, + // so don't inline methods that have them. LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedUnresolvedEntrypoint) << "Method " << resolved_method->PrettyMethod() << " could not be inlined because it is using an unresolved" @@ -1964,7 +2060,8 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, ReferenceTypeInfo receiver_type, - HInstruction** return_replacement) { + HInstruction** return_replacement, + bool is_speculative) { DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid())); const dex::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); @@ -2057,10 +2154,18 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, SubstituteArguments(callee_graph, invoke_instruction, receiver_type, dex_compilation_unit); - RunOptimizations(callee_graph, code_item, dex_compilation_unit); + const bool try_catch_inlining_allowed_for_recursive_inline = + // It was allowed previously. + try_catch_inlining_allowed_ && + // The current invoke is not a try block. + !invoke_instruction->GetBlock()->IsTryBlock(); + RunOptimizations(callee_graph, + code_item, + dex_compilation_unit, + try_catch_inlining_allowed_for_recursive_inline); size_t number_of_instructions = 0; - if (!CanInlineBody(callee_graph, invoke_instruction->GetBlock(), &number_of_instructions)) { + if (!CanInlineBody(callee_graph, invoke_instruction, &number_of_instructions, is_speculative)) { return false; } @@ -2095,16 +2200,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, void HInliner::RunOptimizations(HGraph* callee_graph, const dex::CodeItem* code_item, - const DexCompilationUnit& dex_compilation_unit) { + const DexCompilationUnit& dex_compilation_unit, + bool try_catch_inlining_allowed_for_recursive_inline) { // Note: if the outermost_graph_ is being compiled OSR, we should not run any // optimization that could lead to a HDeoptimize. The following optimizations do not. HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); - HConstantFolding fold(callee_graph, "constant_folding$inliner"); + HConstantFolding fold(callee_graph, inline_stats_, "constant_folding$inliner"); InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_); HOptimization* optimizations[] = { - &simplify, &fold, + &simplify, &dce, }; @@ -2141,7 +2247,8 @@ void HInliner::RunOptimizations(HGraph* callee_graph, total_number_of_dex_registers_ + accessor.RegistersSize(), total_number_of_instructions_ + number_of_instructions, this, - depth_ + 1); + depth_ + 1, + try_catch_inlining_allowed_for_recursive_inline); inliner.Run(); } @@ -2155,6 +2262,10 @@ static bool IsReferenceTypeRefinement(ObjPtr<mirror::Class> declared_class, } ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo(); + if (!actual_rti.IsValid()) { + return false; + } + ObjPtr<mirror::Class> actual_class = actual_rti.GetTypeHandle().Get(); return (actual_rti.IsExact() && !declared_is_exact) || (declared_class != actual_class && declared_class->IsAssignableFrom(actual_class)); diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index a2c2085e00..af067dae73 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -17,13 +17,14 @@ #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_ #define ART_COMPILER_OPTIMIZING_INLINER_H_ +#include "base/macros.h" #include "dex/dex_file_types.h" #include "dex/invoke_type.h" #include "jit/profiling_info.h" #include "optimization.h" #include "profile/profile_compilation_info.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class DexCompilationUnit; @@ -42,7 +43,8 @@ class HInliner : public HOptimization { size_t total_number_of_dex_registers, size_t total_number_of_instructions, HInliner* parent, - size_t depth = 0, + size_t depth, + bool try_catch_inlining_allowed, const char* name = kInlinerPassName) : HOptimization(outer_graph, name, stats), outermost_graph_(outermost_graph), @@ -54,6 +56,7 @@ class HInliner : public HOptimization { parent_(parent), depth_(depth), inlining_budget_(0), + try_catch_inlining_allowed_(try_catch_inlining_allowed), inline_stats_(nullptr) {} bool Run() override; @@ -70,9 +73,7 @@ class HInliner : public HOptimization { kInlineCacheMissingTypes = 5 }; - // We set `did_set_always_throws` as true if we analyzed `invoke_instruction` and it always - // throws. - bool TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_always_throws); + bool TryInline(HInvoke* invoke_instruction); // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether // reference type propagation can run after the inlining. If the inlining is successful, this @@ -80,19 +81,22 @@ class HInliner : public HOptimization { bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, ReferenceTypeInfo receiver_type, - bool do_rtp) + bool do_rtp, + bool is_speculative) REQUIRES_SHARED(Locks::mutator_lock_); bool TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, ReferenceTypeInfo receiver_type, - HInstruction** return_replacement) + HInstruction** return_replacement, + bool is_speculative) REQUIRES_SHARED(Locks::mutator_lock_); bool TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, ReferenceTypeInfo receiver_type, - HInstruction** return_replacement) + HInstruction** return_replacement, + bool is_speculative) REQUIRES_SHARED(Locks::mutator_lock_); // Substitutes parameters in the callee graph with their values from the caller. @@ -105,8 +109,9 @@ class HInliner : public HOptimization { // Run simple optimizations on `callee_graph`. void RunOptimizations(HGraph* callee_graph, const dex::CodeItem* code_item, - const DexCompilationUnit& dex_compilation_unit) - REQUIRES_SHARED(Locks::mutator_lock_); + const DexCompilationUnit& dex_compilation_unit, + bool try_catch_inlining_allowed_for_recursive_inline) + REQUIRES_SHARED(Locks::mutator_lock_); // Try to recognize known simple patterns and replace invoke call with appropriate instructions. bool TryPatternSubstitution(HInvoke* invoke_instruction, @@ -129,12 +134,14 @@ class HInliner : public HOptimization { const CodeItemDataAccessor& accessor) const REQUIRES_SHARED(Locks::mutator_lock_); - // Returns whether the inlining budget allows inlining method. + // Returns whether inlining is encouraged. // // For example, this checks whether the function has grown too large and // inlining should be prevented. - bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const - REQUIRES_SHARED(Locks::mutator_lock_); + bool IsInliningEncouraged(const HInvoke* invoke_instruction, + art::ArtMethod* method, + const CodeItemDataAccessor& accessor) const + REQUIRES_SHARED(Locks::mutator_lock_); // Inspects the body of a method (callee_graph) and returns whether it can be // inlined. @@ -142,8 +149,9 @@ class HInliner : public HOptimization { // This checks for instructions and constructs that we do not support // inlining, such as inlining a throw instruction into a try block. bool CanInlineBody(const HGraph* callee_graph, - const HBasicBlock* target_block, - size_t* out_number_of_instructions) const + HInvoke* invoke, + size_t* out_number_of_instructions, + bool is_speculative) const REQUIRES_SHARED(Locks::mutator_lock_); // Create a new HInstanceFieldGet. @@ -320,6 +328,9 @@ class HInliner : public HOptimization { // The budget left for inlining, in number of instructions. size_t inlining_budget_; + // States if we are allowing try catch inlining to occur at this particular instance of inlining. + bool try_catch_inlining_allowed_; + // Used to record stats about optimizations on the inlined graph. // If the inlining is successful, these stats are merged to the caller graph's stats. OptimizingCompilerStats* inline_stats_; diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index e0bdd0963c..fee9091145 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -42,7 +42,7 @@ #include "ssa_builder.h" #include "well_known_classes.h" -namespace art { +namespace art HIDDEN { namespace { @@ -343,6 +343,10 @@ static bool IsBlockPopulated(HBasicBlock* block) { // Suspend checks were inserted into loop headers during building of dominator tree. DCHECK(block->GetFirstInstruction()->IsSuspendCheck()); return block->GetFirstInstruction() != block->GetLastInstruction(); + } else if (block->IsCatchBlock()) { + // Nops were inserted into the beginning of catch blocks. + DCHECK(block->GetFirstInstruction()->IsNop()); + return block->GetFirstInstruction() != block->GetLastInstruction(); } else { return !block->GetInstructions().IsEmpty(); } @@ -387,6 +391,11 @@ bool HInstructionBuilder::Build() { // This is slightly odd because the loop header might not be empty (TryBoundary). // But we're still creating the environment with locals from the top of the block. InsertInstructionAtTop(suspend_check); + } else if (current_block_->IsCatchBlock()) { + // We add an environment emitting instruction at the beginning of each catch block, in order + // to support try catch inlining. + // This is slightly odd because the catch block might not be empty (TryBoundary). + InsertInstructionAtTop(new (allocator_) HNop(block_dex_pc, /* needs_environment= */ true)); } if (block_dex_pc == kNoDexPc || current_block_ != block_builder_->GetBlockAt(block_dex_pc)) { @@ -414,7 +423,7 @@ bool HInstructionBuilder::Build() { } if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) { - AppendInstruction(new (allocator_) HNativeDebugInfo(dex_pc)); + AppendInstruction(new (allocator_) HNop(dex_pc, /* needs_environment= */ true)); } // Note: There may be no Thread for gtests. @@ -460,6 +469,9 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { current_block_ = graph_->GetEntryBlock(); InitializeBlockLocals(); InitializeParameters(); + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + AppendInstruction(new (allocator_) HMethodEntryHook(0u)); + } AppendInstruction(new (allocator_) HGoto(0u)); // Fill the body. @@ -495,14 +507,21 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { dispatch_info, invoke_type, target_method, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false); } // Add the return instruction. if (return_type_ == DataType::Type::kVoid) { + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + AppendInstruction(new (allocator_) HMethodExitHook(graph_->GetNullConstant(), kNoDexPc)); + } AppendInstruction(new (allocator_) HReturnVoid()); } else { + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + AppendInstruction(new (allocator_) HMethodExitHook(latest_result_, kNoDexPc)); + } AppendInstruction(new (allocator_) HReturn(latest_result_)); } @@ -972,11 +991,11 @@ static ArtMethod* ResolveMethod(uint16_t method_idx, *imt_or_vtable_index = resolved_method->GetVtableIndex(); } else if (*invoke_type == kInterface) { // For HInvokeInterface we need the IMT index. - *imt_or_vtable_index = ImTable::GetImtIndex(resolved_method); + *imt_or_vtable_index = resolved_method->GetImtIndex(); + DCHECK_EQ(*imt_or_vtable_index, ImTable::GetImtIndex(resolved_method)); } - *is_string_constructor = - resolved_method->IsConstructor() && resolved_method->GetDeclaringClass()->IsStringClass(); + *is_string_constructor = resolved_method->IsStringConstructor(); return resolved_method; } @@ -1041,7 +1060,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, dispatch_info, invoke_type, resolved_method_reference, - HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); + HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit, + !graph_->IsDebuggable()); return HandleStringInit(invoke, operands, shorty); } @@ -1054,7 +1074,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, } // Try to build an HIR replacement for the intrinsic. - if (UNLIKELY(resolved_method->IsIntrinsic())) { + if (UNLIKELY(resolved_method->IsIntrinsic()) && !graph_->IsDebuggable()) { // All intrinsics are in the primary boot image, so their class can always be referenced // and we do not need to rely on the implicit class initialization check. The class should // be initialized but we do not require that here. @@ -1105,7 +1125,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, dispatch_info, invoke_type, resolved_method_reference, - clinit_check_requirement); + clinit_check_requirement, + !graph_->IsDebuggable()); if (clinit_check != nullptr) { // Add the class initialization check as last input of `invoke`. DCHECK_EQ(clinit_check_requirement, HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit); @@ -1121,7 +1142,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, method_reference, resolved_method, resolved_method_reference, - /*vtable_index=*/ imt_or_vtable_index); + /*vtable_index=*/ imt_or_vtable_index, + !graph_->IsDebuggable()); } else { DCHECK_EQ(invoke_type, kInterface); if (kIsDebugBuild) { @@ -1142,7 +1164,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, resolved_method, resolved_method_reference, /*imt_index=*/ imt_or_vtable_index, - load_kind); + load_kind, + !graph_->IsDebuggable()); } return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false); } @@ -1341,12 +1364,14 @@ bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc, method_reference, resolved_method, resolved_method_reference, - proto_idx); + proto_idx, + !graph_->IsDebuggable()); if (!HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false)) { return false; } - if (invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvoke && + if (invoke->GetIntrinsic() != Intrinsics::kNone && + invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvoke && invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvokeExact && VarHandleAccessorNeedsReturnTypeCheck(invoke, return_type)) { // Type check is needed because VarHandle intrinsics do not type check the retrieved reference. @@ -1379,7 +1404,8 @@ bool HInstructionBuilder::BuildInvokeCustom(uint32_t dex_pc, call_site_idx, return_type, dex_pc, - method_reference); + method_reference, + !graph_->IsDebuggable()); return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false); } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 817fbaa9e8..3d65d8fb54 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_ #include "base/array_ref.h" +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "data_type.h" @@ -27,7 +28,7 @@ #include "handle.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class ArenaBitVector; class ArtField; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 789f07786c..0c2fd5de56 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -31,13 +31,13 @@ #include "sharpening.h" #include "string_builder_append.h" -namespace art { +namespace art HIDDEN { // Whether to run an exhaustive test of individual HInstructions cloning when each instruction // is replaced with its copy if it is clonable. static constexpr bool kTestInstructionClonerExhaustively = false; -class InstructionSimplifierVisitor : public HGraphDelegateVisitor { +class InstructionSimplifierVisitor final : public HGraphDelegateVisitor { public: InstructionSimplifierVisitor(HGraph* graph, CodeGenerator* codegen, @@ -970,7 +970,7 @@ void InstructionSimplifierVisitor::VisitPredicatedInstanceFieldGet( pred_get->GetFieldInfo().GetDexFile(), pred_get->GetDexPc()); if (pred_get->GetType() == DataType::Type::kReference) { - replace_with->SetReferenceTypeInfo(pred_get->GetReferenceTypeInfo()); + replace_with->SetReferenceTypeInfoIfValid(pred_get->GetReferenceTypeInfo()); } pred_get->GetBlock()->InsertInstructionBefore(replace_with, pred_get); pred_get->ReplaceWith(replace_with); @@ -1117,6 +1117,10 @@ void InstructionSimplifierVisitor::VisitIf(HIf* instruction) { } } +// TODO(solanes): This optimization should be in ConstantFolding since we are folding to a constant. +// However, we get code size regressions when we do that since we sometimes have a NullCheck between +// HArrayLength and IsNewArray, and said NullCheck is eliminated in InstructionSimplifier. If we run +// ConstantFolding and InstructionSimplifier in lockstep this wouldn't be an issue. void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) { HInstruction* input = instruction->InputAt(0); // If the array is a NewArray with constant size, replace the array length @@ -1142,13 +1146,13 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { if (value->IsArrayGet()) { if (value->AsArrayGet()->GetArray() == instruction->GetArray()) { // If the code is just swapping elements in the array, no need for a type check. - instruction->ClearNeedsTypeCheck(); + instruction->ClearTypeCheck(); return; } } if (value->IsNullConstant()) { - instruction->ClearNeedsTypeCheck(); + instruction->ClearTypeCheck(); return; } @@ -1160,13 +1164,13 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { } if (value_rti.IsValid() && array_rti.CanArrayHold(value_rti)) { - instruction->ClearNeedsTypeCheck(); + instruction->ClearTypeCheck(); return; } if (array_rti.IsObjectArray()) { if (array_rti.IsExact()) { - instruction->ClearNeedsTypeCheck(); + instruction->ClearTypeCheck(); return; } instruction->SetStaticTypeOfArrayIsObjectArray(); @@ -1860,13 +1864,16 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) { // Search HDiv having the specified dividend and divisor which is in the specified basic block. // Return nullptr if nothing has been found. -static HInstruction* FindDivWithInputsInBasicBlock(HInstruction* dividend, - HInstruction* divisor, - HBasicBlock* basic_block) { +static HDiv* FindDivWithInputsInBasicBlock(HInstruction* dividend, + HInstruction* divisor, + HBasicBlock* basic_block) { for (const HUseListNode<HInstruction*>& use : dividend->GetUses()) { HInstruction* user = use.GetUser(); - if (user->GetBlock() == basic_block && user->IsDiv() && user->InputAt(1) == divisor) { - return user; + if (user->GetBlock() == basic_block && + user->IsDiv() && + user->InputAt(0) == dividend && + user->InputAt(1) == divisor) { + return user->AsDiv(); } } return nullptr; @@ -1900,7 +1907,7 @@ void InstructionSimplifierVisitor::TryToReuseDiv(HRem* rem) { } } - HInstruction* quotient = FindDivWithInputsInBasicBlock(dividend, divisor, basic_block); + HDiv* quotient = FindDivWithInputsInBasicBlock(dividend, divisor, basic_block); if (quotient == nullptr) { return; } @@ -2458,7 +2465,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) DCHECK(method != nullptr); DCHECK(method->IsStatic()); DCHECK(method->GetDeclaringClass() == system); - invoke->SetResolvedMethod(method); + invoke->SetResolvedMethod(method, !codegen_->GetGraph()->IsDebuggable()); // Sharpen the new invoke. Note that we do not update the dex method index of // the invoke, as we would need to look it up in the current dex file, and it // is unlikely that it exists. The most usual situation for such typed @@ -2647,15 +2654,13 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { // Collect args and check for unexpected uses. // We expect one call to a constructor with no arguments, one constructor fence (unless // eliminated), some number of append calls and one call to StringBuilder.toString(). - bool constructor_inlined = false; bool seen_constructor = false; bool seen_constructor_fence = false; bool seen_to_string = false; uint32_t format = 0u; uint32_t num_args = 0u; + bool has_fp_args = false; HInstruction* args[StringBuilderAppend::kMaxArgs]; // Added in reverse order. - // When inlining, `maybe_new_array` tracks an environment use that we want to allow. - HInstruction* maybe_new_array = nullptr; for (HBackwardInstructionIterator iter(block->GetInstructions()); !iter.Done(); iter.Advance()) { HInstruction* user = iter.Current(); // Instructions of interest apply to `sb`, skip those that do not involve `sb`. @@ -2700,6 +2705,14 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { case Intrinsics::kStringBuilderAppendLong: arg = StringBuilderAppend::Argument::kLong; break; + case Intrinsics::kStringBuilderAppendFloat: + arg = StringBuilderAppend::Argument::kFloat; + has_fp_args = true; + break; + case Intrinsics::kStringBuilderAppendDouble: + arg = StringBuilderAppend::Argument::kDouble; + has_fp_args = true; + break; case Intrinsics::kStringBuilderAppendCharSequence: { ReferenceTypeInfo rti = user->AsInvokeVirtual()->InputAt(1)->GetReferenceTypeInfo(); if (!rti.IsValid()) { @@ -2719,10 +2732,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { } break; } - case Intrinsics::kStringBuilderAppendFloat: - case Intrinsics::kStringBuilderAppendDouble: - // TODO: Unimplemented, needs to call FloatingDecimal.getBinaryToASCIIConverter(). - return false; default: { return false; } @@ -2736,25 +2745,13 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { format = (format << StringBuilderAppend::kBitsPerArg) | static_cast<uint32_t>(arg); args[num_args] = as_invoke_virtual->InputAt(1u); ++num_args; - } else if (!seen_constructor) { - // At this point, we should see the constructor. However, we might have inlined it so we have - // to take care of both cases. We accept only the constructor with no extra arguments. This - // means that if we inline it, we have to check it is setting its field to a new array. - if (user->IsInvokeStaticOrDirect() && - user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr && - user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() && - user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) { - constructor_inlined = false; - } else if (user->IsInstanceFieldSet() && - user->AsInstanceFieldSet()->GetFieldType() == DataType::Type::kReference && - user->AsInstanceFieldSet()->InputAt(0) == sb && - user->AsInstanceFieldSet()->GetValue()->IsNewArray()) { - maybe_new_array = user->AsInstanceFieldSet()->GetValue(); - constructor_inlined = true; - } else { - // We were expecting a constructor but we haven't seen it. Abort optimization. - return false; - } + } else if (user->IsInvokeStaticOrDirect() && + user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr && + user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() && + user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) { + // After arguments, we should see the constructor. + // We accept only the constructor with no extra arguments. + DCHECK(!seen_constructor); DCHECK(!seen_constructor_fence); seen_constructor = true; } else if (user->IsConstructorFence()) { @@ -2780,17 +2777,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { // Accept only calls on the StringBuilder (which shall all be removed). // TODO: Carve-out for const-string? Or rely on environment pruning (to be implemented)? if (holder->InputCount() == 0 || holder->InputAt(0) != sb) { - // When inlining the constructor, we have a NewArray and may have a LoadClass as an - // environment use. - if (constructor_inlined) { - if (holder == maybe_new_array) { - continue; - } - if (holder == maybe_new_array->InputAt(0)) { - DCHECK(holder->IsLoadClass()); - continue; - } - } return false; } } @@ -2798,9 +2784,9 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { // Create replacement instruction. HIntConstant* fmt = block->GetGraph()->GetIntConstant(static_cast<int32_t>(format)); ArenaAllocator* allocator = block->GetGraph()->GetAllocator(); - HStringBuilderAppend* append = - new (allocator) HStringBuilderAppend(fmt, num_args, allocator, invoke->GetDexPc()); - append->SetReferenceTypeInfo(invoke->GetReferenceTypeInfo()); + HStringBuilderAppend* append = new (allocator) HStringBuilderAppend( + fmt, num_args, has_fp_args, allocator, invoke->GetDexPc()); + append->SetReferenceTypeInfoIfValid(invoke->GetReferenceTypeInfo()); for (size_t i = 0; i != num_args; ++i) { append->SetArgumentAt(i, args[num_args - 1u - i]); } @@ -2824,33 +2810,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { while (sb->HasNonEnvironmentUses()) { block->RemoveInstruction(sb->GetUses().front().GetUser()); } - if (constructor_inlined) { - // We need to remove the inlined constructor instructions, - // and all remaining environment uses (if any). - DCHECK(sb->HasEnvironmentUses()); - DCHECK(maybe_new_array != nullptr); - DCHECK(maybe_new_array->IsNewArray()); - DCHECK(maybe_new_array->HasNonEnvironmentUses()); - HInstruction* fence = maybe_new_array->GetUses().front().GetUser(); - DCHECK(fence->IsConstructorFence()); - block->RemoveInstruction(fence); - block->RemoveInstruction(maybe_new_array); - if (sb->HasEnvironmentUses()) { - // We know the only remaining uses are from the LoadClass. - HInstruction* load_class = maybe_new_array->InputAt(0); - DCHECK(load_class->IsLoadClass()); - for (HEnvironment* env = load_class->GetEnvironment(); - env != nullptr; - env = env->GetParent()) { - for (size_t i = 0, size = env->Size(); i != size; ++i) { - if (env->GetInstructionAt(i) == sb) { - env->RemoveAsUserOfInput(i); - env->SetRawEnvAt(i, /*instruction=*/ nullptr); - } - } - } - } - } DCHECK(!sb->HasEnvironmentUses()); block->RemoveInstruction(sb); return true; diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index feea771096..98ebaafebc 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -17,11 +17,12 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" #include "optimizing_compiler_stats.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index 1371ea7781..05a518d544 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -23,7 +23,7 @@ #include "mirror/string.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { using helpers::CanFitInShifterOperand; using helpers::HasShifterOperand; @@ -31,7 +31,7 @@ using helpers::IsSubRightSubLeftShl; namespace arm { -class InstructionSimplifierArmVisitor : public HGraphVisitor { +class InstructionSimplifierArmVisitor final : public HGraphVisitor { public: InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats) : HGraphVisitor(graph), stats_(stats) {} diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index fca9341d59..0517e4f49e 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { namespace arm { class InstructionSimplifierArm : public HOptimization { diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index a6ec02012c..671900bd9d 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -21,7 +21,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { using helpers::CanFitInShifterOperand; using helpers::HasShifterOperand; @@ -31,7 +31,7 @@ namespace arm64 { using helpers::ShifterOperandSupportsExtension; -class InstructionSimplifierArm64Visitor : public HGraphVisitor { +class InstructionSimplifierArm64Visitor final : public HGraphVisitor { public: InstructionSimplifierArm64Visitor(HGraph* graph, OptimizingCompilerStats* stats) : HGraphVisitor(graph), stats_(stats) {} diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 8d93c01ebf..374638ab9e 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM64_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM64_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { namespace arm64 { class InstructionSimplifierArm64 : public HOptimization { diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index dc60ba62bb..34daae21ee 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -18,7 +18,7 @@ #include "mirror/array-inl.h" -namespace art { +namespace art HIDDEN { namespace { @@ -244,7 +244,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access, // The access may require a runtime call or the original array pointer. return false; } - if (kEmitCompilerReadBarrier && + if (gUseReadBarrier && !kUseBakerReadBarrier && access->IsArrayGet() && access->GetType() == DataType::Type::kReference) { diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index 876ed21a22..ddc3a867b8 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { namespace helpers { diff --git a/compiler/optimizing/instruction_simplifier_test.cc b/compiler/optimizing/instruction_simplifier_test.cc index c7c5b12e25..966f5b91cf 100644 --- a/compiler/optimizing/instruction_simplifier_test.cc +++ b/compiler/optimizing/instruction_simplifier_test.cc @@ -26,13 +26,15 @@ #include "optimizing/data_type.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { namespace mirror { class ClassExt; class Throwable; } // namespace mirror +static constexpr bool kDebugSimplifierTests = false; + template<typename SuperClass> class InstructionSimplifierTestBase : public SuperClass, public OptimizingUnitTestHelper { public: @@ -49,6 +51,19 @@ class InstructionSimplifierTestBase : public SuperClass, public OptimizingUnitTe SuperClass::TearDown(); gLogVerbosity.compiler = false; } + + void PerformSimplification(const AdjacencyListGraph& blks) { + if (kDebugSimplifierTests) { + LOG(INFO) << "Pre simplification " << blks; + } + graph_->ClearDominanceInformation(); + graph_->BuildDominatorTree(); + InstructionSimplifier simp(graph_, /*codegen=*/nullptr); + simp.Run(); + if (kDebugSimplifierTests) { + LOG(INFO) << "Post simplify " << blks; + } + } }; class InstructionSimplifierTest : public InstructionSimplifierTestBase<CommonCompilerTest> {}; @@ -197,13 +212,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoMerge) { SetupExit(exit); - LOG(INFO) << "Pre simplification " << blks; - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post simplify " << blks; + PerformSimplification(blks); EXPECT_INS_RETAINED(read_end); @@ -289,13 +298,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetMerge) { SetupExit(exit); - LOG(INFO) << "Pre simplification " << blks; - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post simplify " << blks; + PerformSimplification(blks); EXPECT_FALSE(obj3->CanBeNull()); EXPECT_INS_RETAINED(read_end); @@ -373,13 +376,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoNull) { SetupExit(exit); - LOG(INFO) << "Pre simplification " << blks; - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post simplify " << blks; + PerformSimplification(blks); EXPECT_FALSE(obj1->CanBeNull()); EXPECT_FALSE(obj2->CanBeNull()); @@ -464,16 +461,7 @@ TEST_P(InstanceOfInstructionSimplifierTestGroup, ExactClassInstanceOfOther) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre simplification " << blks; - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post simplify " << blks; + PerformSimplification(blks); if (!GetConstantResult() || GetParam() == InstanceOfKind::kSelf) { EXPECT_INS_RETAINED(target_klass); @@ -532,16 +520,7 @@ TEST_P(InstanceOfInstructionSimplifierTestGroup, ExactClassCheckCastOther) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre simplification " << blks; - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post simplify " << blks; + PerformSimplification(blks); if (!GetConstantResult() || GetParam() == InstanceOfKind::kSelf) { EXPECT_INS_RETAINED(target_klass); diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc index 2d8f94a85b..5a4345d589 100644 --- a/compiler/optimizing/instruction_simplifier_x86.cc +++ b/compiler/optimizing/instruction_simplifier_x86.cc @@ -17,11 +17,11 @@ #include "instruction_simplifier_x86_shared.h" #include "code_generator_x86.h" -namespace art { +namespace art HIDDEN { namespace x86 { -class InstructionSimplifierX86Visitor : public HGraphVisitor { +class InstructionSimplifierX86Visitor final : public HGraphVisitor { public: InstructionSimplifierX86Visitor(HGraph* graph, CodeGenerator* codegen, diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h index 6f10006db2..25ebe203b8 100644 --- a/compiler/optimizing/instruction_simplifier_x86.h +++ b/compiler/optimizing/instruction_simplifier_x86.h @@ -16,10 +16,11 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; namespace x86 { diff --git a/compiler/optimizing/instruction_simplifier_x86_64.cc b/compiler/optimizing/instruction_simplifier_x86_64.cc index 56c6b414d7..9ba1a8a960 100644 --- a/compiler/optimizing/instruction_simplifier_x86_64.cc +++ b/compiler/optimizing/instruction_simplifier_x86_64.cc @@ -17,11 +17,11 @@ #include "instruction_simplifier_x86_shared.h" #include "code_generator_x86_64.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { -class InstructionSimplifierX86_64Visitor : public HGraphVisitor { +class InstructionSimplifierX86_64Visitor final : public HGraphVisitor { public: InstructionSimplifierX86_64Visitor(HGraph* graph, CodeGenerator* codegen, diff --git a/compiler/optimizing/instruction_simplifier_x86_64.h b/compiler/optimizing/instruction_simplifier_x86_64.h index 6cae24d11a..1654dc4774 100644 --- a/compiler/optimizing/instruction_simplifier_x86_64.h +++ b/compiler/optimizing/instruction_simplifier_x86_64.h @@ -16,10 +16,11 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.cc b/compiler/optimizing/instruction_simplifier_x86_shared.cc index 2805abb2bb..74c5ca2466 100644 --- a/compiler/optimizing/instruction_simplifier_x86_shared.cc +++ b/compiler/optimizing/instruction_simplifier_x86_shared.cc @@ -14,9 +14,10 @@ */ #include "instruction_simplifier_x86_shared.h" + #include "nodes_x86.h" -namespace art { +namespace art HIDDEN { bool TryCombineAndNot(HAnd* instruction) { DataType::Type type = instruction->GetType(); diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.h b/compiler/optimizing/instruction_simplifier_x86_shared.h index 7f94d7ea4c..1a44d0fdb5 100644 --- a/compiler/optimizing/instruction_simplifier_x86_shared.h +++ b/compiler/optimizing/instruction_simplifier_x86_shared.h @@ -16,13 +16,16 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { + bool TryCombineAndNot(HAnd* instruction); bool TryGenerateResetLeastSetBit(HAnd* instruction); bool TryGenerateMaskUptoLeastSetBit(HXor* instruction); bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other); + } // namespace art #endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc index 5f6f562161..7e542117a9 100644 --- a/compiler/optimizing/intrinsic_objects.cc +++ b/compiler/optimizing/intrinsic_objects.cc @@ -22,7 +22,7 @@ #include "image.h" #include "obj_ptr-inl.h" -namespace art { +namespace art HIDDEN { static constexpr size_t kIntrinsicObjectsOffset = enum_cast<size_t>(ImageHeader::kIntrinsicObjectsStart); diff --git a/compiler/optimizing/intrinsic_objects.h b/compiler/optimizing/intrinsic_objects.h index ed764bd4b2..d750f2934b 100644 --- a/compiler/optimizing/intrinsic_objects.h +++ b/compiler/optimizing/intrinsic_objects.h @@ -19,9 +19,10 @@ #include "base/bit_field.h" #include "base/bit_utils.h" +#include "base/macros.h" #include "base/mutex.h" -namespace art { +namespace art HIDDEN { class ClassLinker; template <class MirrorType> class ObjPtr; @@ -56,15 +57,15 @@ class IntrinsicObjects { } // Functions for retrieving data for Integer.valueOf(). - static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache( + EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache( Thread* self, ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_); - static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache( + EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) REQUIRES_SHARED(Locks::mutator_lock_); - static ObjPtr<mirror::Object> GetIntegerValueOfObject( + EXPORT static ObjPtr<mirror::Object> GetIntegerValueOfObject( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_); - static MemberOffset GetIntegerValueOfArrayDataOffset( + EXPORT static MemberOffset GetIntegerValueOfArrayDataOffset( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index f2d2b45da9..774deec438 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -32,7 +32,7 @@ #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" -namespace art { +namespace art HIDDEN { std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { switch (intrinsic) { @@ -171,6 +171,7 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, if (!CanReferenceBootImageObjects(invoke, compiler_options)) { return; } + HInstruction* const input = invoke->InputAt(0); if (compiler_options.IsBootImage()) { if (!compiler_options.IsImageClass(kIntegerCacheDescriptor) || !compiler_options.IsImageClass(kIntegerDescriptor)) { @@ -207,8 +208,8 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, CHECK_EQ(value_field->GetInt(current_object), low + i); } } - if (invoke->InputAt(0)->IsIntConstant()) { - int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (input->IsIntConstant()) { + int32_t value = input->AsIntConstant()->GetValue(); if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < static_cast<uint32_t>(high - low + 1)) { // No call, we shall use direct pointer to the Integer object. @@ -232,8 +233,8 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, } else { DCHECK(compiler_options.IsAotCompiler()); DCHECK(CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)); - if (invoke->InputAt(0)->IsIntConstant()) { - int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (input->IsIntConstant()) { + int32_t value = input->AsIntConstant()->GetValue(); // Retrieve the `value` from the lowest cached Integer. ObjPtr<mirror::Object> low_integer = IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u); @@ -255,11 +256,11 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, ArenaAllocator* allocator = codegen->GetGraph()->GetAllocator(); LocationSummary* locations = new (allocator) LocationSummary(invoke, call_kind, kIntrinsified); if (call_kind == LocationSummary::kCallOnMainOnly) { - locations->SetInAt(0, Location::RegisterOrConstant(invoke->InputAt(0))); + locations->SetInAt(0, Location::RegisterOrConstant(input)); locations->AddTemp(first_argument_location); locations->SetOut(return_location); } else { - locations->SetInAt(0, Location::ConstantLocation(invoke->InputAt(0)->AsConstant())); + locations->SetInAt(0, Location::ConstantLocation(input)); locations->SetOut(Location::RequiresRegister()); } } @@ -392,7 +393,7 @@ void IntrinsicVisitor::CreateReferenceGetReferentLocations(HInvoke* invoke, } void IntrinsicVisitor::CreateReferenceRefersToLocations(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { // Unimplemented for non-Baker read barrier. return; } diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 5109882295..893cd04411 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -17,12 +17,13 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_H_ +#include "base/macros.h" #include "code_generator.h" #include "nodes.h" #include "optimization.h" #include "parallel_move_resolver.h" -namespace art { +namespace art HIDDEN { class DexFile; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 646f4f2ea7..d2dbaa32e3 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -46,7 +46,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) #include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { namespace arm64 { @@ -55,7 +55,6 @@ using helpers::DRegisterFrom; using helpers::HeapOperand; using helpers::LocationFrom; using helpers::InputCPURegisterOrZeroRegAt; -using helpers::IsConstantZeroBitPattern; using helpers::OperandFrom; using helpers::RegisterFrom; using helpers::SRegisterFrom; @@ -92,7 +91,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp) : SlowPathCodeARM64(instruction), tmp_(tmp) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); } @@ -711,7 +710,7 @@ static void GenUnsafeGet(HInvoke* invoke, Location trg_loc = locations->Out(); Register trg = RegisterFrom(trg_loc, type); - if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) { // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. Register temp = WRegisterFrom(locations->GetTemp(0)); MacroAssembler* masm = codegen->GetVIXLAssembler(); @@ -754,7 +753,7 @@ static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) { } static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); + bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -1096,7 +1095,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) } static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) { - const bool can_call = kEmitCompilerReadBarrier && IsUnsafeCASObject(invoke); + const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -1448,7 +1447,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM6 vixl::aarch64::Label* exit_loop = &exit_loop_label; vixl::aarch64::Label* cmp_failure = &exit_loop_label; - if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) { + if (gUseReadBarrier && type == DataType::Type::kReference) { // We need to store the `old_value` in a non-scratch register to make sure // the read barrier in the slow path does not clobber it. old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path. @@ -1523,12 +1522,12 @@ void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* in } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } CreateUnsafeCASLocations(allocator_, invoke); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // We need two non-scratch temporary registers for read barrier. LocationSummary* locations = invoke->GetLocations(); if (kUseBakerReadBarrier) { @@ -1578,7 +1577,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invok } void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); GenUnsafeCas(invoke, DataType::Type::kReference, codegen_); } @@ -2576,9 +2575,9 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Bind(&done); } -// Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native -// implementation there for longer copy lengths. -static constexpr int32_t kSystemArrayCopyCharThreshold = 32; +// This value is greater than ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, +// so if we choose to jump to the slow path we will end up in the native implementation. +static constexpr int32_t kSystemArrayCopyCharThreshold = 192; static void SetSystemArrayCopyLocationRequires(LocationSummary* locations, uint32_t at, @@ -2710,11 +2709,13 @@ static void GenSystemArrayCopyAddresses(MacroAssembler* masm, __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift)); } - if (copy_length.IsConstant()) { - int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); - __ Add(src_end, src_base, element_size * constant); - } else { - __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift)); + if (src_end.IsValid()) { + if (copy_length.IsConstant()) { + int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); + __ Add(src_end, src_base, element_size * constant); + } else { + __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift)); + } } } @@ -2745,13 +2746,14 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { if (!length.IsConstant()) { // Merge the following two comparisons into one: // If the length is negative, bail out (delegate to libcore's native implementation). - // If the length > 32 then (currently) prefer libcore's native implementation. + // If the length > kSystemArrayCopyCharThreshold then (currently) prefer libcore's + // native implementation. __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold); __ B(slow_path->GetEntryLabel(), hi); } else { // We have already checked in the LocationsBuilder for the constant case. DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0); - DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32); + DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), kSystemArrayCopyCharThreshold); } Register src_curr_addr = WRegisterFrom(locations->GetTemp(0)); @@ -2787,21 +2789,102 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { length, src_curr_addr, dst_curr_addr, - src_stop_addr); + Register()); // Iterate over the arrays and do a raw copy of the chars. const int32_t char_size = DataType::Size(DataType::Type::kUint16); UseScratchRegisterScope temps(masm); - Register tmp = temps.AcquireW(); - vixl::aarch64::Label loop, done; - __ Bind(&loop); - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&done, eq); - __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex)); - __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex)); - __ B(&loop); - __ Bind(&done); + // We split processing of the array in two parts: head and tail. + // A first loop handles the head by copying a block of characters per + // iteration (see: chars_per_block). + // A second loop handles the tail by copying the remaining characters. + // If the copy length is not constant, we copy them one-by-one. + // If the copy length is constant, we optimize by always unrolling the tail + // loop, and also unrolling the head loop when the copy length is small (see: + // unroll_threshold). + // + // Both loops are inverted for better performance, meaning they are + // implemented as conditional do-while loops. + // Here, the loop condition is first checked to determine if there are + // sufficient chars to run an iteration, then we enter the do-while: an + // iteration is performed followed by a conditional branch only if another + // iteration is necessary. As opposed to a standard while-loop, this inversion + // can save some branching (e.g. we don't branch back to the initial condition + // at the end of every iteration only to potentially immediately branch + // again). + // + // A full block of chars is subtracted and added before and after the head + // loop, respectively. This ensures that any remaining length after each + // head loop iteration means there is a full block remaining, reducing the + // number of conditional checks required on every iteration. + constexpr int32_t chars_per_block = 4; + constexpr int32_t unroll_threshold = 2 * chars_per_block; + vixl::aarch64::Label loop1, loop2, pre_loop2, done; + + Register length_tmp = src_stop_addr.W(); + Register tmp = temps.AcquireRegisterOfSize(char_size * chars_per_block * kBitsPerByte); + + auto emitHeadLoop = [&]() { + __ Bind(&loop1); + __ Ldr(tmp, MemOperand(src_curr_addr, char_size * chars_per_block, PostIndex)); + __ Subs(length_tmp, length_tmp, chars_per_block); + __ Str(tmp, MemOperand(dst_curr_addr, char_size * chars_per_block, PostIndex)); + __ B(&loop1, ge); + }; + + auto emitTailLoop = [&]() { + __ Bind(&loop2); + __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex)); + __ Subs(length_tmp, length_tmp, 1); + __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex)); + __ B(&loop2, gt); + }; + + auto emitUnrolledTailLoop = [&](const int32_t tail_length) { + DCHECK_LT(tail_length, 4); + + // Don't use post-index addressing, and instead add a constant offset later. + if ((tail_length & 2) != 0) { + __ Ldr(tmp.W(), MemOperand(src_curr_addr)); + __ Str(tmp.W(), MemOperand(dst_curr_addr)); + } + if ((tail_length & 1) != 0) { + const int32_t offset = (tail_length & ~1) * char_size; + __ Ldrh(tmp, MemOperand(src_curr_addr, offset)); + __ Strh(tmp, MemOperand(dst_curr_addr, offset)); + } + }; + + if (length.IsConstant()) { + const int32_t constant_length = length.GetConstant()->AsIntConstant()->GetValue(); + if (constant_length >= unroll_threshold) { + __ Mov(length_tmp, constant_length - chars_per_block); + emitHeadLoop(); + } else { + static_assert(unroll_threshold == 8, "The unroll_threshold must be 8."); + // Fully unroll both the head and tail loops. + if ((constant_length & 4) != 0) { + __ Ldr(tmp, MemOperand(src_curr_addr, 4 * char_size, PostIndex)); + __ Str(tmp, MemOperand(dst_curr_addr, 4 * char_size, PostIndex)); + } + } + emitUnrolledTailLoop(constant_length % chars_per_block); + } else { + Register length_reg = WRegisterFrom(length); + __ Subs(length_tmp, length_reg, chars_per_block); + __ B(&pre_loop2, lt); + + emitHeadLoop(); + + __ Bind(&pre_loop2); + __ Adds(length_tmp, length_tmp, chars_per_block); + __ B(&done, eq); + + emitTailLoop(); + } + + __ Bind(&done); __ Bind(slow_path->GetExitLabel()); } @@ -2814,7 +2897,7 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128; void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2866,7 +2949,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Temporary register IP0, obtained from the VIXL scratch register // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64 // (because that register is clobbered by ReadBarrierMarkRegX @@ -2884,7 +2967,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2991,7 +3074,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { UseScratchRegisterScope temps(masm); Location temp3_loc; // Used only for Baker read barrier. Register temp3; - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { temp3_loc = locations->GetTemp(2); temp3 = WRegisterFrom(temp3_loc); } else { @@ -3004,7 +3087,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, @@ -3165,7 +3248,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, temp1_loc, @@ -3215,7 +3298,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { __ Cbz(WRegisterFrom(length), &done); } - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // TODO: Also convert this intrinsic to the IsGcMarking strategy? // SystemArrayCopy implementation for Baker read barriers (see @@ -3335,7 +3418,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false); + codegen_->MarkGCCard(dest.W(), Register(), /* emit_null_check= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -3451,7 +3534,7 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) { IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) { + if (gUseReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) { invoke->GetLocations()->AddTemp(Location::RequiresRegister()); } } @@ -3466,7 +3549,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); codegen_->AddSlowPath(slow_path); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Check self->GetWeakRefAccessEnabled(). UseScratchRegisterScope temps(masm); Register temp = temps.AcquireW(); @@ -3493,7 +3576,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, WRegisterFrom(obj), @@ -3533,7 +3616,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) { __ Cmp(tmp, other); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { DCHECK(kUseBakerReadBarrier); vixl::aarch64::Label calculate_result; @@ -4629,7 +4712,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, method.X(), ArtField::DeclaringClassOffset().Int32Value(), /*fixup_label=*/ nullptr, - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); } } } else { @@ -4673,8 +4756,8 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { uint32_t number_of_arguments = invoke->GetNumberOfArguments(); for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) { HInstruction* arg = invoke->InputAt(arg_index); - if (IsConstantZeroBitPattern(arg)) { - locations->SetInAt(arg_index, Location::ConstantLocation(arg->AsConstant())); + if (IsZeroBitPattern(arg)) { + locations->SetInAt(arg_index, Location::ConstantLocation(arg)); } else if (DataType::IsFloatingPointType(arg->GetType())) { locations->SetInAt(arg_index, Location::RequiresFpuRegister()); } else { @@ -4683,7 +4766,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { } // Add a temporary for offset. - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields. // To preserve the offset value across the non-Baker read barrier slow path // for loading the declaring class, use a fixed callee-save register. @@ -4706,7 +4789,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) { return; } - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && invoke->GetType() == DataType::Type::kReference && invoke->GetIntrinsic() != Intrinsics::kVarHandleGet && invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) { @@ -4746,7 +4829,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, DCHECK(use_load_acquire || order == std::memory_order_relaxed); // Load the value from the target location. - if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) { // Piggy-back on the field load path using introspection for the Baker read barrier. // The `target.offset` is a temporary, use it for field address. Register tmp_ptr = target.offset.X(); @@ -4898,7 +4981,7 @@ static void GenerateVarHandleSet(HInvoke* invoke, } if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) { - codegen->MarkGCCard(target.object, Register(value), /*value_can_be_null=*/ true); + codegen->MarkGCCard(target.object, Register(value), /* emit_null_check= */ true); } if (slow_path != nullptr) { @@ -4947,7 +5030,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo uint32_t number_of_arguments = invoke->GetNumberOfArguments(); DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u); - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && value_type == DataType::Type::kReference) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field. This breaks the read barriers @@ -4961,7 +5044,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo LocationSummary* locations = CreateVarHandleCommonLocations(invoke); - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { // We need callee-save registers for both the class object and offset instead of // the temporaries reserved in CreateVarHandleCommonLocations(). static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u); @@ -4985,16 +5068,16 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo // Add a temporary for old value and exclusive store result if floating point // `expected` and/or `new_value` take scratch registers. size_t available_scratch_registers = - (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) + - (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u); + (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) + + (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u); size_t temps_needed = /* pointer, old value, store result */ 3u - available_scratch_registers; // We can reuse the declaring class (if present) and offset temporary. if (temps_needed > old_temp_count) { locations->AddRegisterTemps(temps_needed - old_temp_count); } } else if ((value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) && - !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) && - !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) && + !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) && + !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) && GetExpectedVarHandleCoordinatesCount(invoke) == 2u) { // Allocate a normal temporary for store result in the non-native byte order path // because scratch registers are used by the byte-swapped `expected` and `new_value`. @@ -5002,7 +5085,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo locations->AddTemp(Location::RequiresRegister()); } } - if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + if (gUseReadBarrier && value_type == DataType::Type::kReference) { // Add a temporary for the `old_value_temp` in slow path. locations->AddTemp(Location::RequiresRegister()); } @@ -5068,7 +5151,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, // except for references that need the offset for the read barrier. UseScratchRegisterScope temps(masm); Register tmp_ptr = target.offset.X(); - if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + if (gUseReadBarrier && value_type == DataType::Type::kReference) { tmp_ptr = temps.AcquireX(); } __ Add(tmp_ptr, target.object.X(), target.offset.X()); @@ -5151,7 +5234,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, vixl::aarch64::Label* exit_loop = &exit_loop_label; vixl::aarch64::Label* cmp_failure = &exit_loop_label; - if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + if (gUseReadBarrier && value_type == DataType::Type::kReference) { // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register. size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); @@ -5296,7 +5379,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, return; } - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && invoke->GetType() == DataType::Type::kReference) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field, thus seeing the new value @@ -5316,7 +5399,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, DCHECK(get_and_update_op == GetAndUpdateOp::kSet); // We can reuse the declaring class temporary if present. if (old_temp_count == 1u && - !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { + !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { // Add a temporary for `old_value` if floating point `new_value` takes a scratch register. locations->AddTemp(Location::RequiresRegister()); } @@ -5327,7 +5410,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, if (old_temp_count == 1u && (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) && GetExpectedVarHandleCoordinatesCount(invoke) == 2u && - !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { + !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { DataType::Type value_type = GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u); if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) { @@ -5372,7 +5455,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, // except for references that need the offset for the non-Baker read barrier. UseScratchRegisterScope temps(masm); Register tmp_ptr = target.offset.X(); - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && value_type == DataType::Type::kReference) { tmp_ptr = temps.AcquireX(); } @@ -5402,7 +5485,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, // the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one // in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that. old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type); - } else if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) && + } else if ((gUseReadBarrier && kUseBakerReadBarrier) && value_type == DataType::Type::kReference) { // Load the old value initially to a scratch register. // We shall move it to `out` later with a read barrier. @@ -5450,7 +5533,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, __ Sxtb(out.W(), old_value.W()); } else if (value_type == DataType::Type::kInt16) { __ Sxth(out.W(), old_value.W()); - } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + } else if (gUseReadBarrier && value_type == DataType::Type::kReference) { if (kUseBakerReadBarrier) { codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(out.W(), old_value.W()); } else { @@ -5647,7 +5730,7 @@ void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) { // Byte order check. For native byte order return to the main path. if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet && - IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { + IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { // There is no reason to differentiate between native byte order and byte-swap // for setting a zero bit pattern. Just return to the main path. __ B(GetNativeByteOrderLabel()); @@ -5677,42 +5760,9 @@ void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) { __ B(GetExitLabel()); } -UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString); -UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyByte); -UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyInt); - -// 1.8. -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetObject) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARM64, Name) +UNIMPLEMENTED_INTRINSIC_LIST_ARM64(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(ARM64) diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index 9c46efddec..a0ccf87f7b 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM64_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM64_H_ +#include "base/macros.h" #include "intrinsics.h" namespace vixl { @@ -27,7 +28,7 @@ class MacroAssembler; } // namespace aarch64 } // namespace vixl -namespace art { +namespace art HIDDEN { class ArenaAllocator; class HInvokeStaticOrDirect; diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index d850cadc2b..266b5bc799 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -34,7 +34,7 @@ #include "aarch32/constants-aarch32.h" -namespace art { +namespace art HIDDEN { namespace arm { #define __ assembler->GetVIXLAssembler()-> @@ -120,7 +120,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { public: explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); } @@ -1242,7 +1242,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invo void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1265,7 +1265,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) { locations->SetInAt(4, Location::RequiresRegister()); } - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Temporary register IP cannot be used in // ReadBarrierSystemArrayCopySlowPathARM (because that register // is clobbered by ReadBarrierMarkRegX entry points). Get an extra @@ -1339,7 +1339,7 @@ static void CheckPosition(ArmVIXLAssembler* assembler, void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); ArmVIXLAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1453,7 +1453,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( @@ -1584,7 +1584,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false); @@ -1621,7 +1621,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false); } - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // TODO: Also convert this intrinsic to the IsGcMarking strategy? // SystemArrayCopy implementation for Baker read barriers (see @@ -1723,7 +1723,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null= */ false); + codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* emit_null_check= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -2511,7 +2511,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); codegen_->AddSlowPath(slow_path); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Check self->GetWeakRefAccessEnabled(). UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); @@ -2539,7 +2539,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, RegisterFrom(obj), @@ -2587,7 +2587,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) { assembler->MaybeUnpoisonHeapReference(tmp); codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile. - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { DCHECK(kUseBakerReadBarrier); vixl32::Label calculate_result; @@ -2613,7 +2613,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) { __ Bind(&calculate_result); } else { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!gUseReadBarrier); __ Sub(out, tmp, other); } @@ -2732,7 +2732,7 @@ static void GenerateIntrinsicGet(HInvoke* invoke, } break; case DataType::Type::kReference: - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Piggy-back on the field load path using introspection for the Baker read barrier. vixl32::Register temp = RegisterFrom(maybe_temp); __ Add(temp, base, offset); @@ -2777,7 +2777,7 @@ static void GenerateIntrinsicGet(HInvoke* invoke, codegen->GenerateMemoryBarrier( seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny); } - if (type == DataType::Type::kReference && !(kEmitCompilerReadBarrier && kUseBakerReadBarrier)) { + if (type == DataType::Type::kReference && !(gUseReadBarrier && kUseBakerReadBarrier)) { Location base_loc = LocationFrom(base); Location index_loc = LocationFrom(offset); codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc); @@ -2802,7 +2802,7 @@ static void CreateUnsafeGetLocations(HInvoke* invoke, CodeGeneratorARMVIXL* codegen, DataType::Type type, bool atomic) { - bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); + bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); LocationSummary* locations = new (allocator) LocationSummary(invoke, @@ -2818,7 +2818,7 @@ static void CreateUnsafeGetLocations(HInvoke* invoke, locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); - if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) || + if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) || (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier, @@ -2837,7 +2837,7 @@ static void GenUnsafeGet(HInvoke* invoke, vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only. Location out = locations->Out(); Location maybe_temp = Location::NoLocation(); - if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) || + if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) || (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) { maybe_temp = locations->GetTemp(0); } @@ -3470,7 +3470,7 @@ static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen, // branch goes to the read barrier slow path that clobbers `success` anyway. bool init_failure_for_cmp = success.IsValid() && - !(kEmitCompilerReadBarrier && type == DataType::Type::kReference && expected.IsRegister()); + !(gUseReadBarrier && type == DataType::Type::kReference && expected.IsRegister()); // Instruction scheduling: Loading a constant between LDREX* and using the loaded value // is essentially free, so prepare the failure value here if we can. bool init_failure_for_cmp_early = @@ -3655,7 +3655,7 @@ class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL { }; static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) { - const bool can_call = kEmitCompilerReadBarrier && IsUnsafeCASObject(invoke); + const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -3706,7 +3706,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMV vixl32::Label* exit_loop = &exit_loop_label; vixl32::Label* cmp_failure = &exit_loop_label; - if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) { + if (gUseReadBarrier && type == DataType::Type::kReference) { // If marking, check if the stored reference is a from-space reference to the same // object as the to-space reference `expected`. If so, perform a custom CAS loop. ReadBarrierCasSlowPathARMVIXL* slow_path = @@ -3770,7 +3770,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* i } void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers (b/173104084). - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -3798,7 +3798,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invo } void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers (b/173104084). - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); GenUnsafeCas(invoke, DataType::Type::kReference, codegen_); } @@ -4351,7 +4351,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, LocationFrom(target.object), method, ArtField::DeclaringClassOffset().Int32Value(), - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); } } } else { @@ -4403,7 +4403,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { } // Add a temporary for offset. - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields. // To preserve the offset value across the non-Baker read barrier slow path // for loading the declaring class, use a fixed callee-save register. @@ -4428,7 +4428,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke, return; } - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && invoke->GetType() == DataType::Type::kReference && invoke->GetIntrinsic() != Intrinsics::kVarHandleGet && invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) { @@ -4476,7 +4476,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, Location maybe_temp = Location::NoLocation(); Location maybe_temp2 = Location::NoLocation(); Location maybe_temp3 = Location::NoLocation(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) { + if (gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) { // Reuse the offset temporary. maybe_temp = LocationFrom(target.offset); } else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) { @@ -4590,7 +4590,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke, HInstruction* arg = invoke->InputAt(number_of_arguments - 1u); bool has_reverse_bytes_slow_path = (expected_coordinates_count == 2u) && - !(arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern()); + !IsZeroBitPattern(arg); if (Use64BitExclusiveLoadStore(atomic, codegen)) { // We need 4 temporaries in the byte array view slow path. Otherwise, we need // 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type. @@ -4699,7 +4699,7 @@ static void GenerateVarHandleSet(HInvoke* invoke, vixl32::Register temp = target.offset; vixl32::Register card = temps.Acquire(); vixl32::Register value_reg = RegisterFrom(value); - codegen->MarkGCCard(temp, card, target.object, value_reg, /*value_can_be_null=*/ true); + codegen->MarkGCCard(temp, card, target.object, value_reg, /* emit_null_check= */ true); } if (slow_path != nullptr) { @@ -4749,7 +4749,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo uint32_t number_of_arguments = invoke->GetNumberOfArguments(); DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u); - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && value_type == DataType::Type::kReference) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field. This breaks the read barriers @@ -4763,7 +4763,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo LocationSummary* locations = CreateVarHandleCommonLocations(invoke); - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { // We need callee-save registers for both the class object and offset instead of // the temporaries reserved in CreateVarHandleCommonLocations(). static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u); @@ -4799,7 +4799,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo locations->AddRegisterTemps(2u); } } - if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + if (gUseReadBarrier && value_type == DataType::Type::kReference) { // Add a temporary for store result, also used for the `old_value_temp` in slow path. locations->AddTemp(Location::RequiresRegister()); } @@ -4930,7 +4930,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, vixl32::Label* exit_loop = &exit_loop_label; vixl32::Label* cmp_failure = &exit_loop_label; - if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + if (gUseReadBarrier && value_type == DataType::Type::kReference) { // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked // reloaded old value for subsequent CAS in the slow path. This must not clobber `old_value`. vixl32::Register old_value_temp = return_success ? RegisterFrom(out) : store_result; @@ -5086,7 +5086,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, return; } - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && invoke->GetType() == DataType::Type::kReference) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field, thus seeing the new value @@ -5107,7 +5107,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, // Add temps needed to do the GenerateGetAndUpdate() with core registers. size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u; locations->AddRegisterTemps(temps_needed - locations->GetTempCount()); - } else if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + } else if ((gUseReadBarrier && !kUseBakerReadBarrier) && value_type == DataType::Type::kReference) { // We need to preserve the declaring class (if present) and offset for read barrier // slow paths, so we must use a separate temporary for the exclusive store result. @@ -5213,7 +5213,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, if (byte_swap) { GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg); } - } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + } else if (gUseReadBarrier && value_type == DataType::Type::kReference) { if (kUseBakerReadBarrier) { // Load the old value initially to a temporary register. // We shall move it to `out` later with a read barrier. @@ -5296,7 +5296,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, } else { __ Vmov(SRegisterFrom(out), RegisterFrom(old_value)); } - } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + } else if (gUseReadBarrier && value_type == DataType::Type::kReference) { if (kUseBakerReadBarrier) { codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(RegisterFrom(out), RegisterFrom(old_value)); @@ -5517,7 +5517,7 @@ void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) // Byte order check. For native byte order return to the main path. if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) { HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u); - if (arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern()) { + if (IsZeroBitPattern(arg)) { // There is no reason to differentiate between native byte order and byte-swap // for setting a zero bit pattern. Just return to the main path. __ B(GetNativeByteOrderLabel()); @@ -5549,69 +5549,9 @@ void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) __ B(GetExitLabel()); } -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. -UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongDivideUnsigned) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Compare) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Min) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Max) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMultiplyHigh) - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString); - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyByte); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyInt); - -// 1.8. -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaDouble) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaFloat) - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeCASLong) // High register pressure. -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetObject) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeCompareAndSetLong) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARMVIXL, Name) +UNIMPLEMENTED_INTRINSIC_LIST_ARM(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(ARMVIXL) diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h index 3103cec8f0..54475bcc7e 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.h +++ b/compiler/optimizing/intrinsics_arm_vixl.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_ +#include "base/macros.h" #include "intrinsics.h" #include "utils/arm/assembler_arm_vixl.h" -namespace art { +namespace art HIDDEN { namespace arm { diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h index 19f5e332a8..13cabdafed 100644 --- a/compiler/optimizing/intrinsics_utils.h +++ b/compiler/optimizing/intrinsics_utils.h @@ -29,7 +29,7 @@ #include "utils/assembler.h" #include "utils/label.h" -namespace art { +namespace art HIDDEN { // Default slow-path for fallback (calling the managed code to handle the intrinsic) in an // intrinsified call. This will copy the arguments into the positions for a regular call. diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 7d90aae984..d2072201f8 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -38,7 +38,7 @@ #include "utils/x86/assembler_x86.h" #include "utils/x86/constants_x86.h" -namespace art { +namespace art HIDDEN { namespace x86 { @@ -75,7 +75,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { public: explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); } @@ -1699,7 +1699,7 @@ static void GenUnsafeGet(HInvoke* invoke, case DataType::Type::kReference: { Register output = output_loc.AsRegister<Register>(); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { if (kUseBakerReadBarrier) { Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -1757,7 +1757,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, DataType::Type type, bool is_volatile) { - bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); + bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -2103,7 +2103,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) { static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, DataType::Type type, HInvoke* invoke) { - const bool can_call = kEmitCompilerReadBarrier && + const bool can_call = gUseReadBarrier && kUseBakerReadBarrier && IsUnsafeCASObject(invoke); LocationSummary* locations = @@ -2175,7 +2175,7 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invo void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2304,7 +2304,7 @@ static void GenReferenceCAS(HInvoke* invoke, DCHECK_EQ(expected, EAX); DCHECK_NE(temp, temp2); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -2391,7 +2391,7 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codeg if (type == DataType::Type::kReference) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); Register temp = locations->GetTemp(0).AsRegister<Register>(); Register temp2 = locations->GetTemp(1).AsRegister<Register>(); @@ -2413,7 +2413,7 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); GenCAS(DataType::Type::kReference, invoke, codegen_); } @@ -2443,7 +2443,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); GenCAS(DataType::Type::kReference, invoke, codegen_); } @@ -2843,7 +2843,7 @@ static void GenSystemArrayCopyEndAddress(X86Assembler* assembler, void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2875,7 +2875,7 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2995,7 +2995,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // slow path. if (!optimizations.GetSourceIsNonPrimitiveArray()) { - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); @@ -3022,7 +3022,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { if (length.Equals(Location::RegisterLocation(temp3))) { // When Baker read barriers are enabled, register `temp3`, // which in the present case contains the `length` parameter, @@ -3120,7 +3120,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); @@ -3151,7 +3151,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // Compute the base source address in `temp1`. GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // If it is needed (in the case of the fast-path loop), the base // destination address is computed later, as `temp2` is used for // intermediate computations. @@ -3259,7 +3259,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false); + codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* emit_null_check= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -3377,7 +3377,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Check self->GetWeakRefAccessEnabled(). ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>(); __ fs()->cmpl(Address::Absolute(offset), @@ -3400,7 +3400,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, obj.AsRegister<Register>(), @@ -3442,7 +3442,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) { NearLabel end, return_true, return_false; __ cmpl(out, other); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { DCHECK(kUseBakerReadBarrier); __ j(kEqual, &return_true); @@ -3781,7 +3781,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke, Location::RegisterLocation(temp), Address(temp, declaring_class_offset), /* fixup_label= */ nullptr, - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); return temp; } @@ -3794,7 +3794,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke, static void CreateVarHandleGetLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -3836,7 +3836,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) { static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -3860,7 +3860,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) { Address field_addr(ref, offset, TIMES_1, 0); // Load the value from the field - if (type == DataType::Type::kReference && kCompilerReadBarrierOption == kWithReadBarrier) { + if (type == DataType::Type::kReference && gCompilerReadBarrierOption == kWithReadBarrier) { codegen->GenerateReferenceLoadWithBakerReadBarrier( invoke, out, ref, field_addr, /* needs_null_check= */ false); } else if (type == DataType::Type::kInt64 && @@ -3917,7 +3917,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) { static void CreateVarHandleSetLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -3963,7 +3963,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke) { case DataType::Type::kInt64: // We only handle constant non-atomic int64 values. DCHECK(value->IsConstant()); - locations->SetInAt(value_index, Location::ConstantLocation(value->AsConstant())); + locations->SetInAt(value_index, Location::ConstantLocation(value)); break; case DataType::Type::kReference: locations->SetInAt(value_index, Location::RequiresRegister()); @@ -3990,7 +3990,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke) { static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4041,13 +4041,16 @@ static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) { InstructionCodeGeneratorX86* instr_codegen = down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor()); // Store the value to the field - instr_codegen->HandleFieldSet(invoke, - value_index, - value_type, - Address(reference, offset, TIMES_1, 0), - reference, - is_volatile, - /* value_can_be_null */ true); + instr_codegen->HandleFieldSet( + invoke, + value_index, + value_type, + Address(reference, offset, TIMES_1, 0), + reference, + is_volatile, + /* value_can_be_null */ true, + // Value can be null, and this write barrier is not being relied on for other sets. + WriteBarrierKind::kEmitWithNullCheck); __ Bind(slow_path->GetExitLabel()); } @@ -4087,7 +4090,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) { static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -4135,7 +4138,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) { static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4194,7 +4197,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX); break; case DataType::Type::kReference: { - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -4208,7 +4211,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege &temp2); } codegen->MarkGCCard( - temp, temp2, reference, value.AsRegister<Register>(), /* value_can_be_null= */ false); + temp, temp2, reference, value.AsRegister<Register>(), /* emit_null_check= */ false); if (kPoisonHeapReferences) { __ movl(temp, value.AsRegister<Register>()); __ PoisonHeapReference(temp); @@ -4258,7 +4261,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -4322,7 +4325,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) { static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4441,7 +4444,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -4490,7 +4493,7 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) { static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4591,7 +4594,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -4659,7 +4662,7 @@ static void GenerateBitwiseOp(HInvoke* invoke, static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4829,64 +4832,9 @@ void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) { } } -UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) -UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) -UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, LongDivideUnsigned) -UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) -UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes) -UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer) -UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat) -UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf) -UNIMPLEMENTED_INTRINSIC(X86, FP16Floor) -UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil) -UNIMPLEMENTED_INTRINSIC(X86, FP16Rint) -UNIMPLEMENTED_INTRINSIC(X86, FP16Greater) -UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals) -UNIMPLEMENTED_INTRINSIC(X86, FP16Less) -UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals) -UNIMPLEMENTED_INTRINSIC(X86, FP16Compare) -UNIMPLEMENTED_INTRINSIC(X86, FP16Min) -UNIMPLEMENTED_INTRINSIC(X86, FP16Max) -UNIMPLEMENTED_INTRINSIC(X86, MathMultiplyHigh) - -UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString); - -// 1.8. - -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetObject) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86, Name) +UNIMPLEMENTED_INTRINSIC_LIST_X86(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(X86) diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index ae150dad43..77c236d244 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_ +#include "base/macros.h" #include "intrinsics.h" -namespace art { +namespace art HIDDEN { class ArenaAllocator; class HInvokeStaticOrDirect; diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 3c31374f67..9d0d5f155e 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -36,7 +36,7 @@ #include "utils/x86_64/assembler_x86_64.h" #include "utils/x86_64/constants_x86_64.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { @@ -71,7 +71,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { public: explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); } @@ -836,7 +836,7 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -887,7 +887,7 @@ static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler, void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1002,7 +1002,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // slow path. bool did_unpoison = false; - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false); @@ -1034,7 +1034,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ TMP = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false); @@ -1055,7 +1055,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // Bail out if the source is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // For the same reason given earlier, `temp1` is not trashed by the // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference<Class> */ TMP = temp2->component_type_ @@ -1081,7 +1081,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { NearLabel do_copy; __ j(kEqual, &do_copy); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); @@ -1109,7 +1109,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); @@ -1141,7 +1141,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { GenSystemArrayCopyAddresses( GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // SystemArrayCopy implementation for Baker read barriers (see // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier): // @@ -1224,7 +1224,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null= */ false); + codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* emit_null_check= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -1888,7 +1888,7 @@ static void GenUnsafeGet(HInvoke* invoke, break; case DataType::Type::kReference: { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { if (kUseBakerReadBarrier) { Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -1930,7 +1930,7 @@ static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) { } static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); + bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -2230,7 +2230,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) static void CreateUnsafeCASLocations(ArenaAllocator* allocator, DataType::Type type, HInvoke* invoke) { - const bool can_call = kEmitCompilerReadBarrier && + const bool can_call = gUseReadBarrier && kUseBakerReadBarrier && IsUnsafeCASObject(invoke); LocationSummary* locations = @@ -2253,7 +2253,7 @@ static void CreateUnsafeCASLocations(ArenaAllocator* allocator, // Need two temporaries for MarkGCCard. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier. DCHECK(kUseBakerReadBarrier); locations->AddTemp(Location::RequiresRegister()); @@ -2298,7 +2298,7 @@ void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* i void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2438,7 +2438,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen, CpuRegister temp3, bool is_cmpxchg) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); @@ -2447,7 +2447,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen, codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); Address field_addr(base, offset, TIMES_1, 0); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -2556,7 +2556,7 @@ static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen, CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>(); CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>(); CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>(); - CpuRegister temp3 = kEmitCompilerReadBarrier + CpuRegister temp3 = gUseReadBarrier ? locations->GetTemp(temp3_index).AsRegister<CpuRegister>() : CpuRegister(kNoRegister); DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3})); @@ -2624,7 +2624,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invo void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); GenCAS(DataType::Type::kReference, invoke, codegen_); } @@ -3128,7 +3128,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); codegen_->AddSlowPath(slow_path); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Check self->GetWeakRefAccessEnabled(). ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>(); __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true), @@ -3150,7 +3150,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, obj.AsRegister<CpuRegister>(), @@ -3191,7 +3191,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) { __ cmpl(out, other); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { DCHECK(kUseBakerReadBarrier); NearLabel calculate_result; @@ -3771,7 +3771,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, Location::RegisterLocation(target.object), Address(method, ArtField::DeclaringClassOffset()), /*fixup_label=*/ nullptr, - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); } } } else { @@ -3790,7 +3790,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return false; } @@ -3876,7 +3876,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, Location out = locations->Out(); if (type == DataType::Type::kReference) { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { DCHECK(kUseBakerReadBarrier); codegen->GenerateReferenceLoadWithBakerReadBarrier( invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false); @@ -3985,16 +3985,19 @@ static void GenerateVarHandleSet(HInvoke* invoke, Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0); // Store the value to the field. - codegen->GetInstructionCodegen()->HandleFieldSet(invoke, - value_index, - last_temp_index, - value_type, - dst, - CpuRegister(target.object), - is_volatile, - is_atomic, - /*value_can_be_null=*/ true, - byte_swap); + codegen->GetInstructionCodegen()->HandleFieldSet( + invoke, + value_index, + last_temp_index, + value_type, + dst, + CpuRegister(target.object), + is_volatile, + is_atomic, + /*value_can_be_null=*/true, + byte_swap, + // Value can be null, and this write barrier is not being relied on for other sets. + WriteBarrierKind::kEmitWithNullCheck); // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that. @@ -4070,7 +4073,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) { // Need two temporaries for MarkGCCard. locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier. DCHECK(kUseBakerReadBarrier); locations->AddTemp(Location::RequiresRegister()); @@ -4085,7 +4088,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86_64* codegen, bool is_cmpxchg, bool byte_swap = false) { - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86_64Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4218,7 +4221,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) { // Need two temporaries for MarkGCCard. locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier. DCHECK(kUseBakerReadBarrier); locations->AddTemp(Location::RequiresRegister()); @@ -4267,7 +4270,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CpuRegister temp2 = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>(); CpuRegister valreg = value.AsRegister<CpuRegister>(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { codegen->GenerateReferenceLoadWithBakerReadBarrier( invoke, locations->GetTemp(temp_count - 3), @@ -4278,7 +4281,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, &temp1, &temp2); } - codegen->MarkGCCard(temp1, temp2, ref, valreg, /*value_can_be_null=*/ false); + codegen->MarkGCCard(temp1, temp2, ref, valreg, /* emit_null_check= */ false); DCHECK_EQ(valreg, out.AsRegister<CpuRegister>()); if (kPoisonHeapReferences) { @@ -4647,7 +4650,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, bool need_any_store_barrier, bool need_any_any_barrier, bool byte_swap = false) { - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86_64Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4987,57 +4990,9 @@ void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen __ jmp(GetExitLabel()); } -UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update) -UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes) -UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToFloat) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToHalf) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Floor) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Ceil) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Rint) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Greater) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16GreaterEquals) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Less) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16LessEquals) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Compare) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Min) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Max) - -UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString); - -// 1.8. - -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetObject) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name) +UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(X86_64) diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index 199cfede1a..59fe815a94 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ +#include "base/macros.h" #include "intrinsics.h" -namespace art { +namespace art HIDDEN { class ArenaAllocator; class HInvokeStaticOrDirect; diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index 0edb23b857..0c791b640d 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -18,7 +18,7 @@ #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { static bool IsPhiOf(HInstruction* instruction, HBasicBlock* block) { return instruction->IsPhi() && instruction->GetBlock() == block; diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h index 9cafddb05a..1a86b6eb9f 100644 --- a/compiler/optimizing/licm.h +++ b/compiler/optimizing/licm.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_LICM_H_ #define ART_COMPILER_OPTIMIZING_LICM_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class SideEffectsAnalysis; diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index adc3cabe87..f8481099f4 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -17,12 +17,13 @@ #include "licm.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for the LICM tests. diff --git a/compiler/optimizing/linear_order.cc b/compiler/optimizing/linear_order.cc index 58e00a810d..25ca866b2c 100644 --- a/compiler/optimizing/linear_order.cc +++ b/compiler/optimizing/linear_order.cc @@ -19,7 +19,7 @@ #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" -namespace art { +namespace art HIDDEN { static bool InSameLoop(HLoopInformation* first_loop, HLoopInformation* second_loop) { return first_loop == second_loop; diff --git a/compiler/optimizing/linear_order.h b/compiler/optimizing/linear_order.h index 151db001e1..75e75048a3 100644 --- a/compiler/optimizing/linear_order.h +++ b/compiler/optimizing/linear_order.h @@ -19,9 +19,10 @@ #include <type_traits> +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { void LinearizeGraphInternal(const HGraph* graph, ArrayRef<HBasicBlock*> linear_order); diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index d56ae11ca9..01daa23511 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -17,6 +17,7 @@ #include <fstream> #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "code_generator.h" #include "dex/dex_file.h" @@ -28,9 +29,9 @@ #include "pretty_printer.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { -class LinearizeTest : public OptimizingUnitTest { +class LinearizeTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: template <size_t number_of_blocks> void TestCode(const std::vector<uint16_t>& data, diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc index c60386d7b7..b5d1336d4a 100644 --- a/compiler/optimizing/live_interval_test.cc +++ b/compiler/optimizing/live_interval_test.cc @@ -15,12 +15,13 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "optimizing_unit_test.h" #include "ssa_liveness_analysis.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { TEST(LiveInterval, GetStart) { ArenaPoolAndAllocator pool; diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index bb8a4dc08e..fb1a23eef4 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "code_generator.h" #include "dex/dex_file.h" @@ -25,9 +26,9 @@ #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { -class LiveRangesTest : public OptimizingUnitTest { +class LiveRangesTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: HGraph* BuildGraph(const std::vector<uint16_t>& data); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index ba3787e9be..0b421cf9e6 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "code_generator.h" #include "dex/dex_file.h" @@ -25,9 +26,9 @@ #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { -class LivenessTest : public OptimizingUnitTest { +class LivenessTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, const char* expected); }; diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc index 3fe42aff2e..f1c50ac03c 100644 --- a/compiler/optimizing/load_store_analysis.cc +++ b/compiler/optimizing/load_store_analysis.cc @@ -19,7 +19,7 @@ #include "base/scoped_arena_allocator.h" #include "optimizing/escape.h" -namespace art { +namespace art HIDDEN { // A cap for the number of heap locations to prevent pathological time/space consumption. // The number of heap locations for most of the methods stays below this threshold. @@ -283,14 +283,6 @@ bool LoadStoreAnalysis::Run() { heap_location_collector_.CleanUp(); return false; } - if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) { - // Don't do load/store elimination if the method has volatile field accesses or - // monitor operations, for now. - // TODO: do it right. - heap_location_collector_.CleanUp(); - return false; - } - heap_location_collector_.BuildAliasingMatrix(); heap_location_collector_.DumpReferenceStats(stats_); return true; diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h index 4975bae2a2..c46a5b9cc1 100644 --- a/compiler/optimizing/load_store_analysis.h +++ b/compiler/optimizing/load_store_analysis.h @@ -20,6 +20,7 @@ #include "base/arena_allocator.h" #include "base/arena_bit_vector.h" #include "base/bit_vector-inl.h" +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "base/stl_util.h" @@ -28,7 +29,7 @@ #include "nodes.h" #include "optimizing/optimizing_compiler_stats.h" -namespace art { +namespace art HIDDEN { enum class LoadStoreAnalysisType { kBasic, @@ -170,14 +171,16 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { size_t offset, HInstruction* index, size_t vector_length, - int16_t declaring_class_def_index) + int16_t declaring_class_def_index, + bool is_vec_op) : ref_info_(ref_info), type_(DataType::ToSigned(type)), offset_(offset), index_(index), vector_length_(vector_length), declaring_class_def_index_(declaring_class_def_index), - has_aliased_locations_(false) { + has_aliased_locations_(false), + is_vec_op_(is_vec_op) { DCHECK(ref_info != nullptr); DCHECK((offset == kInvalidFieldOffset && index != nullptr) || (offset != kInvalidFieldOffset && index == nullptr)); @@ -188,6 +191,7 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { size_t GetOffset() const { return offset_; } HInstruction* GetIndex() const { return index_; } size_t GetVectorLength() const { return vector_length_; } + bool IsVecOp() const { return is_vec_op_; } // Returns the definition of declaring class' dex index. // It's kDeclaringClassDefIndexForArrays for an array element. @@ -226,11 +230,12 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { // Declaring class's def's dex index. // Invalid when this HeapLocation is not field access. const int16_t declaring_class_def_index_; - // Has aliased heap locations in the method, due to either the // reference is aliased or the array element is aliased via different // index names. bool has_aliased_locations_; + // Whether this HeapLocation represents a vector operation. + bool is_vec_op_; DISALLOW_COPY_AND_ASSIGN(HeapLocation); }; @@ -253,8 +258,6 @@ class HeapLocationCollector : public HGraphVisitor { heap_locations_(allocator->Adapter(kArenaAllocLSA)), aliasing_matrix_(allocator, kInitialAliasingMatrixBitVectorSize, true, kArenaAllocLSA), has_heap_stores_(false), - has_volatile_(false), - has_monitor_operations_(false), lse_type_(lse_type) { aliasing_matrix_.ClearAllBits(); } @@ -319,7 +322,8 @@ class HeapLocationCollector : public HGraphVisitor { field->GetFieldOffset().SizeValue(), nullptr, HeapLocation::kScalar, - field->GetDeclaringClassDefIndex()); + field->GetDeclaringClassDefIndex(), + /*is_vec_op=*/false); } size_t GetArrayHeapLocation(HInstruction* instruction) const { @@ -328,10 +332,10 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetType(); size_t vector_length = HeapLocation::kScalar; + const bool is_vec_op = instruction->IsVecStore() || instruction->IsVecLoad(); if (instruction->IsArraySet()) { type = instruction->AsArraySet()->GetComponentType(); - } else if (instruction->IsVecStore() || - instruction->IsVecLoad()) { + } else if (is_vec_op) { HVecOperation* vec_op = instruction->AsVecOperation(); type = vec_op->GetPackedType(); vector_length = vec_op->GetVectorLength(); @@ -343,21 +347,14 @@ class HeapLocationCollector : public HGraphVisitor { HeapLocation::kInvalidFieldOffset, index, vector_length, - HeapLocation::kDeclaringClassDefIndexForArrays); + HeapLocation::kDeclaringClassDefIndexForArrays, + is_vec_op); } bool HasHeapStores() const { return has_heap_stores_; } - bool HasVolatile() const { - return has_volatile_; - } - - bool HasMonitorOps() const { - return has_monitor_operations_; - } - // Find and return the heap location index in heap_locations_. // NOTE: When heap locations are created, potentially aliasing/overlapping // accesses are given different indexes. This find function also @@ -373,7 +370,8 @@ class HeapLocationCollector : public HGraphVisitor { size_t offset, HInstruction* index, size_t vector_length, - int16_t declaring_class_def_index) const { + int16_t declaring_class_def_index, + bool is_vec_op) const { DataType::Type lookup_type = DataType::ToSigned(type); for (size_t i = 0; i < heap_locations_.size(); i++) { HeapLocation* loc = heap_locations_[i]; @@ -382,7 +380,8 @@ class HeapLocationCollector : public HGraphVisitor { loc->GetOffset() == offset && loc->GetIndex() == index && loc->GetVectorLength() == vector_length && - loc->GetDeclaringClassDefIndex() == declaring_class_def_index) { + loc->GetDeclaringClassDefIndex() == declaring_class_def_index && + loc->IsVecOp() == is_vec_op) { return i; } } @@ -527,22 +526,20 @@ class HeapLocationCollector : public HGraphVisitor { size_t offset, HInstruction* index, size_t vector_length, - int16_t declaring_class_def_index) { + int16_t declaring_class_def_index, + bool is_vec_op) { HInstruction* original_ref = HuntForOriginalReference(ref); ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref); size_t heap_location_idx = FindHeapLocationIndex( - ref_info, type, offset, index, vector_length, declaring_class_def_index); + ref_info, type, offset, index, vector_length, declaring_class_def_index, is_vec_op); if (heap_location_idx == kHeapLocationNotFound) { - HeapLocation* heap_loc = new (allocator_) - HeapLocation(ref_info, type, offset, index, vector_length, declaring_class_def_index); + HeapLocation* heap_loc = new (allocator_) HeapLocation( + ref_info, type, offset, index, vector_length, declaring_class_def_index, is_vec_op); heap_locations_.push_back(heap_loc); } } void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { - if (field_info.IsVolatile()) { - has_volatile_ = true; - } DataType::Type type = field_info.GetFieldType(); const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); @@ -551,19 +548,22 @@ class HeapLocationCollector : public HGraphVisitor { offset, nullptr, HeapLocation::kScalar, - declaring_class_def_index); + declaring_class_def_index, + /*is_vec_op=*/false); } void VisitArrayAccess(HInstruction* array, HInstruction* index, DataType::Type type, - size_t vector_length) { + size_t vector_length, + bool is_vec_op) { MaybeCreateHeapLocation(array, type, HeapLocation::kInvalidFieldOffset, index, vector_length, - HeapLocation::kDeclaringClassDefIndexForArrays); + HeapLocation::kDeclaringClassDefIndexForArrays, + is_vec_op); } void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override { @@ -597,7 +597,7 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetType(); - VisitArrayAccess(array, index, type, HeapLocation::kScalar); + VisitArrayAccess(array, index, type, HeapLocation::kScalar, /*is_vec_op=*/false); CreateReferenceInfoForReferenceType(instruction); } @@ -605,7 +605,7 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetComponentType(); - VisitArrayAccess(array, index, type, HeapLocation::kScalar); + VisitArrayAccess(array, index, type, HeapLocation::kScalar, /*is_vec_op=*/false); has_heap_stores_ = true; } @@ -613,7 +613,7 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetPackedType(); - VisitArrayAccess(array, index, type, instruction->GetVectorLength()); + VisitArrayAccess(array, index, type, instruction->GetVectorLength(), /*is_vec_op=*/true); CreateReferenceInfoForReferenceType(instruction); } @@ -621,7 +621,7 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetPackedType(); - VisitArrayAccess(array, index, type, instruction->GetVectorLength()); + VisitArrayAccess(array, index, type, instruction->GetVectorLength(), /*is_vec_op=*/true); has_heap_stores_ = true; } @@ -637,18 +637,12 @@ class HeapLocationCollector : public HGraphVisitor { CreateReferenceInfoForReferenceType(instruction); } - void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) override { - has_monitor_operations_ = true; - } - ScopedArenaAllocator* allocator_; ScopedArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses. ScopedArenaVector<HeapLocation*> heap_locations_; // All heap locations. ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations. bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better // alias analysis and won't be as effective. - bool has_volatile_; // If there are volatile field accesses. - bool has_monitor_operations_; // If there are monitor operations. LoadStoreAnalysisType lse_type_; DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc index 3c26c8d6ce..865febbd31 100644 --- a/compiler/optimizing/load_store_analysis_test.cc +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/macros.h" #include "load_store_analysis.h" #include <array> @@ -36,7 +37,7 @@ #include "optimizing_unit_test.h" #include "scoped_thread_state_change.h" -namespace art { +namespace art HIDDEN { class LoadStoreAnalysisTest : public CommonCompilerTest, public OptimizingUnitTestHelper { public: @@ -117,12 +118,13 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { size_t field = HeapLocation::kInvalidFieldOffset; size_t vec = HeapLocation::kScalar; size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays; + const bool is_vec_op = false; size_t loc1 = heap_location_collector.FindHeapLocationIndex( - ref, type, field, c1, vec, class_def); + ref, type, field, c1, vec, class_def, is_vec_op); size_t loc2 = heap_location_collector.FindHeapLocationIndex( - ref, type, field, c2, vec, class_def); + ref, type, field, c2, vec, class_def, is_vec_op); size_t loc3 = heap_location_collector.FindHeapLocationIndex( - ref, type, field, index, vec, class_def); + ref, type, field, index, vec, class_def, is_vec_op); // must find this reference info for array in HeapLocationCollector. ASSERT_TRUE(ref != nullptr); // must find these heap locations; @@ -142,7 +144,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); - EXPECT_TRUE(CheckGraph(graph_)); + EXPECT_TRUE(CheckGraph()); } TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { @@ -223,15 +225,14 @@ TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { // accesses to different fields of the same object should not alias. ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); - EXPECT_TRUE(CheckGraph(graph_)); + EXPECT_TRUE(CheckGraph()); } TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { CreateGraph(); - HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); - graph_->AddBlock(entry); - graph_->SetEntryBlock(entry); - graph_->BuildDominatorTree(); + AdjacencyListGraph blks( + SetupFromAdjacencyList("entry", "exit", {{"entry", "body"}, {"body", "exit"}})); + HBasicBlock* body = blks.Get("body"); HInstruction* array = new (GetAllocator()) HParameterValue( graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); @@ -261,23 +262,25 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { HInstruction* arr_set8 = new (GetAllocator()) HArraySet(array, sub_neg1, c0, DataType::Type::kInt32, 0); - entry->AddInstruction(array); - entry->AddInstruction(index); - entry->AddInstruction(add0); - entry->AddInstruction(add1); - entry->AddInstruction(sub0); - entry->AddInstruction(sub1); - entry->AddInstruction(sub_neg1); - entry->AddInstruction(rev_sub1); - - entry->AddInstruction(arr_set1); // array[0] = c0 - entry->AddInstruction(arr_set2); // array[1] = c0 - entry->AddInstruction(arr_set3); // array[i+0] = c0 - entry->AddInstruction(arr_set4); // array[i+1] = c0 - entry->AddInstruction(arr_set5); // array[i-0] = c0 - entry->AddInstruction(arr_set6); // array[i-1] = c0 - entry->AddInstruction(arr_set7); // array[1-i] = c0 - entry->AddInstruction(arr_set8); // array[i-(-1)] = c0 + body->AddInstruction(array); + body->AddInstruction(index); + body->AddInstruction(add0); + body->AddInstruction(add1); + body->AddInstruction(sub0); + body->AddInstruction(sub1); + body->AddInstruction(sub_neg1); + body->AddInstruction(rev_sub1); + + body->AddInstruction(arr_set1); // array[0] = c0 + body->AddInstruction(arr_set2); // array[1] = c0 + body->AddInstruction(arr_set3); // array[i+0] = c0 + body->AddInstruction(arr_set4); // array[i+1] = c0 + body->AddInstruction(arr_set5); // array[i-0] = c0 + body->AddInstruction(arr_set6); // array[i-1] = c0 + body->AddInstruction(arr_set7); // array[1-i] = c0 + body->AddInstruction(arr_set8); // array[i-(-1)] = c0 + + body->AddInstruction(new (GetAllocator()) HReturnVoid()); ScopedArenaAllocator allocator(graph_->GetArenaStack()); LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kBasic); @@ -317,7 +320,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { loc2 = heap_location_collector.GetArrayHeapLocation(arr_set8); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); - EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks(graph_)); + EXPECT_TRUE(CheckGraph()); } TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) { @@ -891,7 +894,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1000,7 +1004,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape2) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1123,7 +1128,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape3) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1403,7 +1409,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacentNoPredicated) { {}, InvokeType::kStatic, {nullptr, 0}, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1504,7 +1511,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacent) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1615,7 +1623,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1631,7 +1640,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, c0, nullptr, @@ -1800,7 +1810,8 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); high_left->AddInstruction(call_left); @@ -1856,7 +1867,8 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_low_left = new (GetAllocator()) HGoto(); call_low_left->AsInvoke()->SetRawInputAt(0, new_inst); low_left->AddInstruction(call_low_left); @@ -2013,7 +2025,8 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) { {}, InvokeType::kStatic, {nullptr, 0}, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left_merge = new (GetAllocator()) HGoto(); left_phi->SetRawInputAt(0, obj_param); left_phi->SetRawInputAt(1, new_inst); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 9b8f07e969..9cabb12a9f 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -319,7 +319,7 @@ * a hash map to the HeapLocationCollector. */ -namespace art { +namespace art HIDDEN { #define LSE_VLOG \ if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO) @@ -855,25 +855,6 @@ class LSEVisitor final : private HGraphDelegateVisitor { } } - // `instruction` is being removed. Try to see if the null check on it - // can be removed. This can happen if the same value is set in two branches - // but not in dominators. Such as: - // int[] a = foo(); - // if () { - // a[0] = 2; - // } else { - // a[0] = 2; - // } - // // a[0] can now be replaced with constant 2, and the null check on it can be removed. - void TryRemovingNullCheck(HInstruction* instruction) { - HInstruction* prev = instruction->GetPrevious(); - if ((prev != nullptr) && prev->IsNullCheck() && (prev == instruction->InputAt(0))) { - // Previous instruction is a null check for this instruction. Remove the null check. - prev->ReplaceWith(prev->InputAt(0)); - prev->GetBlock()->RemoveInstruction(prev); - } - } - HInstruction* GetDefaultValue(DataType::Type type) { switch (type) { case DataType::Type::kReference: @@ -993,13 +974,63 @@ class LSEVisitor final : private HGraphDelegateVisitor { << " but LSE should be the only source of predicated-ifield-gets!"; } + void HandleAcquireLoad(HInstruction* instruction) { + DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) || + (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) || + (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter())) + << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName(); + + // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must + // invalidate all current values. + ScopedArenaVector<ValueRecord>& heap_values = + heap_values_for_[instruction->GetBlock()->GetBlockId()]; + for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { + KeepStores(heap_values[i].stored_by); + heap_values[i].stored_by = Value::PureUnknown(); + heap_values[i].value = Value::PartialUnknown(heap_values[i].value); + } + + // Note that there's no need to record the load as subsequent acquire loads shouldn't be + // eliminated either. + } + + void HandleReleaseStore(HInstruction* instruction) { + DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) || + (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) || + (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter())) + << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName(); + + // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but + // they will push the modifications for other threads to see. Therefore, we must keep the + // stores but there's no need to clobber the value. + ScopedArenaVector<ValueRecord>& heap_values = + heap_values_for_[instruction->GetBlock()->GetBlockId()]; + for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { + KeepStores(heap_values[i].stored_by); + heap_values[i].stored_by = Value::PureUnknown(); + } + + // Note that there's no need to record the store as subsequent release store shouldn't be + // eliminated either. + } + void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override { + if (instruction->IsVolatile()) { + HandleAcquireLoad(instruction); + return; + } + HInstruction* object = instruction->InputAt(0); const FieldInfo& field = instruction->GetFieldInfo(); VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field)); } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { + if (instruction->IsVolatile()) { + HandleReleaseStore(instruction); + return; + } + HInstruction* object = instruction->InputAt(0); const FieldInfo& field = instruction->GetFieldInfo(); HInstruction* value = instruction->InputAt(1); @@ -1008,12 +1039,22 @@ class LSEVisitor final : private HGraphDelegateVisitor { } void VisitStaticFieldGet(HStaticFieldGet* instruction) override { + if (instruction->IsVolatile()) { + HandleAcquireLoad(instruction); + return; + } + HInstruction* cls = instruction->InputAt(0); const FieldInfo& field = instruction->GetFieldInfo(); VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field)); } void VisitStaticFieldSet(HStaticFieldSet* instruction) override { + if (instruction->IsVolatile()) { + HandleReleaseStore(instruction); + return; + } + HInstruction* cls = instruction->InputAt(0); const FieldInfo& field = instruction->GetFieldInfo(); HInstruction* value = instruction->InputAt(1); @@ -1021,6 +1062,14 @@ class LSEVisitor final : private HGraphDelegateVisitor { VisitSetLocation(instruction, idx, value); } + void VisitMonitorOperation(HMonitorOperation* monitor_op) override { + if (monitor_op->IsEnter()) { + HandleAcquireLoad(monitor_op); + } else { + HandleReleaseStore(monitor_op); + } + } + void VisitArrayGet(HArrayGet* instruction) override { VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction)); } @@ -1040,8 +1089,8 @@ class LSEVisitor final : private HGraphDelegateVisitor { } void VisitDeoptimize(HDeoptimize* instruction) override { - // If we are in a try catch, even singletons are observable. - const bool in_try_catch = instruction->GetBlock()->GetTryCatchInformation() != nullptr; + // If we are in a try, even singletons are observable. + const bool inside_a_try = instruction->GetBlock()->IsTryBlock(); HBasicBlock* block = instruction->GetBlock(); ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { @@ -1053,7 +1102,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { // for singletons that don't escape in the deoptimization environment. bool observable = true; ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); - if (!in_try_catch && info->IsSingleton()) { + if (!inside_a_try && info->IsSingleton()) { HInstruction* reference = info->GetReference(); // Finalizable objects always escape. const bool finalizable_object = @@ -1099,10 +1148,8 @@ class LSEVisitor final : private HGraphDelegateVisitor { void HandleThrowingInstruction(HInstruction* instruction) { DCHECK(instruction->CanThrow()); - // If we are inside of a try catch, singletons can become visible since we may not exit the - // method. - HandleExit(instruction->GetBlock(), - instruction->GetBlock()->GetTryCatchInformation() != nullptr); + // If we are inside of a try, singletons can become visible since we may not exit the method. + HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock()); } void VisitMethodEntryHook(HMethodEntryHook* method_entry) override { @@ -1137,6 +1184,14 @@ class LSEVisitor final : private HGraphDelegateVisitor { } } + void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override { + HandleThrowingInstruction(load_method_handle); + } + + void VisitLoadMethodType(HLoadMethodType* load_method_type) override { + HandleThrowingInstruction(load_method_type); + } + void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override { HandleThrowingInstruction(sb_append); } @@ -1149,18 +1204,11 @@ class LSEVisitor final : private HGraphDelegateVisitor { HandleThrowingInstruction(check_cast); } - void VisitMonitorOperation(HMonitorOperation* monitor_op) override { - if (monitor_op->CanThrow()) { - HandleThrowingInstruction(monitor_op); - } - } - void HandleInvoke(HInstruction* instruction) { // If `instruction` can throw we have to presume all stores are visible. const bool can_throw = instruction->CanThrow(); - // If we are in a try catch, even singletons are observable. - const bool can_throw_in_try_catch = - can_throw && instruction->GetBlock()->GetTryCatchInformation() != nullptr; + // If we are in a try, even singletons are observable. + const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock(); SideEffects side_effects = instruction->GetSideEffects(); ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; @@ -1186,7 +1234,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { return cohort.PrecedesBlock(blk); }); }; - if (!can_throw_in_try_catch && + if (!can_throw_inside_a_try && (ref_info->IsSingleton() || // partial and we aren't currently escaping and we haven't escaped yet. (ref_info->IsPartialSingleton() && partial_singleton_did_not_escape(ref_info, blk)))) { @@ -1235,8 +1283,8 @@ class LSEVisitor final : private HGraphDelegateVisitor { } void VisitNewInstance(HNewInstance* new_instance) override { - // If we are in a try catch, even singletons are observable. - const bool in_try_catch = new_instance->GetBlock()->GetTryCatchInformation() != nullptr; + // If we are in a try, even singletons are observable. + const bool inside_a_try = new_instance->GetBlock()->IsTryBlock(); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance); if (ref_info == nullptr) { // new_instance isn't used for field accesses. No need to process it. @@ -1265,7 +1313,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass()); heap_values[i].stored_by = Value::PureUnknown(); } - } else if (in_try_catch || IsEscapingObject(info, block, i)) { + } else if (inside_a_try || IsEscapingObject(info, block, i)) { // Since NewInstance can throw, we presume all previous stores could be visible. KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); @@ -1274,8 +1322,8 @@ class LSEVisitor final : private HGraphDelegateVisitor { } void VisitNewArray(HNewArray* new_array) override { - // If we are in a try catch, even singletons are observable. - const bool in_try_catch = new_array->GetBlock()->GetTryCatchInformation() != nullptr; + // If we are in a try, even singletons are observable. + const bool inside_a_try = new_array->GetBlock()->IsTryBlock(); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array); if (ref_info == nullptr) { // new_array isn't used for array accesses. No need to process it. @@ -1300,7 +1348,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { // Array elements are set to default heap values. heap_values[i].value = Value::Default(); heap_values[i].stored_by = Value::PureUnknown(); - } else if (in_try_catch || IsEscapingObject(info, block, i)) { + } else if (inside_a_try || IsEscapingObject(info, block, i)) { // Since NewArray can throw, we presume all previous stores could be visible. KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); @@ -1704,8 +1752,7 @@ void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) { ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()]; DCHECK(heap_values.empty()); size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations(); - if (block->GetPredecessors().empty() || (block->GetTryCatchInformation() != nullptr && - block->GetTryCatchInformation()->IsCatchBlock())) { + if (block->GetPredecessors().empty() || block->IsCatchBlock()) { DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock()); heap_values.resize(num_heap_locations, {/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()}); @@ -1764,7 +1811,6 @@ static HInstruction* FindOrConstructNonLoopPhi( if (type == DataType::Type::kReference) { // Update reference type information. Pass invalid handles, these are not used for Phis. ReferenceTypePropagation rtp_fixup(block->GetGraph(), - Handle<mirror::ClassLoader>(), Handle<mirror::DexCache>(), /* is_first_run= */ false); rtp_fixup.Visit(phi); @@ -1877,7 +1923,6 @@ void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) { } HInstruction* heap_value = FindSubstitute(record.value.GetInstruction()); AddRemovedLoad(instruction, heap_value); - TryRemovingNullCheck(instruction); } } @@ -2068,9 +2113,15 @@ bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault( HInstruction* replacement = GetDefaultValue(type); for (uint32_t phi_placeholder_index : visited.Indexes()) { DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid()); - phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement); + PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index); + HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation()); + // We use both vector and non vector operations to analyze the information. However, we replace + // only non vector operations in this code path. + if (!hl->IsVecOp()) { + phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement); + phi_placeholders_to_materialize->ClearBit(phi_placeholder_index); + } } - phi_placeholders_to_materialize->Subtract(&visited); return true; } @@ -2125,9 +2176,15 @@ bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput( DCHECK(replacement != nullptr); for (uint32_t phi_placeholder_index : visited.Indexes()) { DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid()); - phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement); + PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index); + HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation()); + // We use both vector and non vector operations to analyze the information. However, we replace + // only vector operations in this code path. + if (hl->IsVecOp()) { + phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement); + phi_placeholders_to_materialize->ClearBit(phi_placeholder_index); + } } - phi_placeholders_to_materialize->Subtract(&visited); return true; } @@ -2352,7 +2409,6 @@ bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_inde } // Update reference type information. Pass invalid handles, these are not used for Phis. ReferenceTypePropagation rtp_fixup(GetGraph(), - Handle<mirror::ClassLoader>(), Handle<mirror::DexCache>(), /* is_first_run= */ false); rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis)); @@ -2639,7 +2695,6 @@ void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unk record.value = local_heap_values[idx]; HInstruction* heap_value = local_heap_values[idx].GetInstruction(); AddRemovedLoad(load_or_store, heap_value); - TryRemovingNullCheck(load_or_store); } } } @@ -2698,7 +2753,6 @@ void LSEVisitor::ProcessLoadsRequiringLoopPhis() { record.value = Replacement(record.value); HInstruction* heap_value = record.value.GetInstruction(); AddRemovedLoad(load, heap_value); - TryRemovingNullCheck(load); } } } @@ -3013,7 +3067,6 @@ class PartialLoadStoreEliminationHelper { return; } ReferenceTypePropagation rtp_fixup(GetGraph(), - Handle<mirror::ClassLoader>(), Handle<mirror::DexCache>(), /* is_first_run= */ false); rtp_fixup.Visit(ArrayRef<HInstruction* const>(new_ref_phis_)); @@ -3333,7 +3386,7 @@ class PartialLoadStoreEliminationHelper { ins->GetBlock()->InsertInstructionBefore(new_fget, ins); if (ins->GetType() == DataType::Type::kReference) { // Reference info is the same - new_fget->SetReferenceTypeInfo(ins->GetReferenceTypeInfo()); + new_fget->SetReferenceTypeInfoIfValid(ins->GetReferenceTypeInfo()); } // In this phase, substitute instructions are used only for the predicated get // default values which are used only if the partial singleton did not escape, diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h index 6ad2eb2c51..42de803ebd 100644 --- a/compiler/optimizing/load_store_elimination.h +++ b/compiler/optimizing/load_store_elimination.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_LOAD_STORE_ELIMINATION_H_ +#include "base/macros.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class SideEffectsAnalysis; diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc index 02dc939878..1ee109980f 100644 --- a/compiler/optimizing/load_store_elimination_test.cc +++ b/compiler/optimizing/load_store_elimination_test.cc @@ -36,7 +36,9 @@ #include "optimizing_unit_test.h" #include "scoped_thread_state_change.h" -namespace art { +namespace art HIDDEN { + +static constexpr bool kDebugLseTests = false; #define CHECK_SUBROUTINE_FAILURE() \ do { \ @@ -54,12 +56,16 @@ class LoadStoreEliminationTestBase : public SuperTest, public OptimizingUnitTest void SetUp() override { SuperTest::SetUp(); - gLogVerbosity.compiler = true; + if (kDebugLseTests) { + gLogVerbosity.compiler = true; + } } void TearDown() override { SuperTest::TearDown(); - gLogVerbosity.compiler = false; + if (kDebugLseTests) { + gLogVerbosity.compiler = false; + } } void PerformLSE(bool with_partial = true) { @@ -67,15 +73,40 @@ class LoadStoreEliminationTestBase : public SuperTest, public OptimizingUnitTest LoadStoreElimination lse(graph_, /*stats=*/nullptr); lse.Run(with_partial); std::ostringstream oss; - EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks(oss)) << oss.str(); + EXPECT_TRUE(CheckGraph(oss)) << oss.str(); } - void PerformLSEWithPartial() { - PerformLSE(true); + void PerformLSEWithPartial(const AdjacencyListGraph& blks) { + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + if (kDebugLseTests) { + LOG(INFO) << "Pre LSE " << blks; + } + PerformLSE(/*with_partial=*/ true); + if (kDebugLseTests) { + LOG(INFO) << "Post LSE " << blks; + } } - void PerformLSENoPartial() { - PerformLSE(false); + void PerformLSENoPartial(const AdjacencyListGraph& blks) { + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + if (kDebugLseTests) { + LOG(INFO) << "Pre LSE " << blks; + } + PerformLSE(/*with_partial=*/ false); + if (kDebugLseTests) { + LOG(INFO) << "Post LSE " << blks; + } + } + + void PerformSimplifications(const AdjacencyListGraph& blks) { + InstructionSimplifier simp(graph_, /*codegen=*/nullptr); + simp.Run(); + + if (kDebugLseTests) { + LOG(INFO) << "Post simplification " << blks; + } } // Create instructions shared among tests. @@ -542,6 +573,7 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue2) { AddVecStore(entry_block_, array_, j_); HInstruction* vstore = AddVecStore(entry_block_, array_, i_); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore)); @@ -557,6 +589,7 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue3) { AddVecStore(entry_block_, array_, i_add1_); HInstruction* vstore = AddVecStore(entry_block_, array_, i_); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore)); @@ -601,6 +634,7 @@ TEST_F(LoadStoreEliminationTest, OverlappingLoadStore) { AddArraySet(entry_block_, array_, i_, c1); HInstruction* vload5 = AddVecLoad(entry_block_, array_, i_); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(load1)); @@ -634,6 +668,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithoutSideEffects) { // a[j] = 1; HInstruction* array_set = AddArraySet(return_block_, array_, j_, c1); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(array_set)); @@ -671,6 +706,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterSIMDLoopWithSideEffects) { // a[j] = 0; HInstruction* a_set = AddArraySet(return_block_, array_, j_, c0); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vload)); @@ -709,6 +745,7 @@ TEST_F(LoadStoreEliminationTest, LoadAfterSIMDLoopWithSideEffects) { // x = a[j]; HInstruction* load = AddArrayGet(return_block_, array_, j_); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vload)); @@ -749,6 +786,7 @@ TEST_F(LoadStoreEliminationTest, MergePredecessorVecStores) { // down: a[i,... i + 3] = [1,...1] HInstruction* vstore4 = AddVecStore(down, array_, i_, vdata); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vstore2)); @@ -839,6 +877,7 @@ TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) { HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload->AsVecLoad()); HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2)); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore1)); @@ -894,7 +933,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithSideEffects2) { // loop: // array2[i] = array[i] // array[0] = 2 - HInstruction* store1 = AddArraySet(entry_block_, array_, c0, c2); + HInstruction* store1 = AddArraySet(pre_header_, array_, c0, c2); HInstruction* load = AddArrayGet(loop_, array_, phi_); HInstruction* store2 = AddArraySet(loop_, array2, phi_, load); @@ -926,6 +965,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueInLoopWithoutWriteSideEffects) HInstruction* vload = AddVecLoad(loop_, array_a, phi_); HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -949,6 +989,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValue) { HInstruction* vload = AddVecLoad(pre_header_, array_a, c0); HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1025,6 +1066,7 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValueInLoopWithoutWriteSideE HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); HInstruction* store = AddArraySet(return_block_, array_, c0, load); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1055,6 +1097,7 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValue) { HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); HInstruction* store = AddArraySet(return_block_, array_, c0, load); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1086,6 +1129,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoadInLoopWithoutWriteSide HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad()); HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad()); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload1)); @@ -1116,6 +1160,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoad) { HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad()); HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad()); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload1)); @@ -2024,10 +2069,7 @@ TEST_F(LoadStoreEliminationTest, PartialUnknownMerge) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSENoPartial(); + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(read_bottom); EXPECT_INS_RETAINED(write_c1); @@ -2174,9 +2216,8 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved) { HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); exit->AddInstruction(read_bottom); exit->AddInstruction(return_exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSENoPartial(); + + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(read_bottom) << *read_bottom; EXPECT_INS_RETAINED(write_right) << *write_right; @@ -2266,9 +2307,8 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved2) { HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); exit->AddInstruction(read_bottom); exit->AddInstruction(return_exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSENoPartial(); + + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(read_bottom); EXPECT_INS_RETAINED(write_right_first); @@ -2499,11 +2539,7 @@ TEST_F(LoadStoreEliminationTest, PartialPhiPropagation) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_); @@ -2656,11 +2692,7 @@ TEST_P(OrderDependentTestGroup, PredicatedUse) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(call_left_left); EXPECT_INS_REMOVED(read1); @@ -2814,11 +2846,7 @@ TEST_P(OrderDependentTestGroup, PredicatedEnvUse) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HNewInstance* moved_new_inst1; HInstanceFieldSet* moved_set1; @@ -2954,11 +2982,7 @@ TEST_P(OrderDependentTestGroup, FieldSetOrderEnv) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(write_entry1); EXPECT_INS_REMOVED(write_entry2); @@ -3115,11 +3139,7 @@ TEST_P(OrderDependentTestGroup, MaterializationMovedUse) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(new_inst1); EXPECT_INS_REMOVED(new_inst2); @@ -3205,11 +3225,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HNewInstance* moved_new_inst = nullptr; HInstanceFieldSet* moved_set = nullptr; @@ -3320,11 +3336,7 @@ TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HNewInstance* moved_new_inst = nullptr; HInstanceFieldSet* moved_set = nullptr; @@ -3497,11 +3509,7 @@ TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HNewInstance* moved_new_inst = nullptr; HInstanceFieldSet* moved_set = nullptr; @@ -3639,11 +3647,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HNewInstance* moved_new_inst; HInstanceFieldSet* moved_set; @@ -3746,11 +3750,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); // Each escaping switch path gets its own materialization block. // Blocks: @@ -3877,11 +3877,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_early); EXPECT_EQ(return_early->InputAt(0), c0); @@ -4013,11 +4009,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc5) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); // Normal LSE can get rid of these two. EXPECT_INS_REMOVED(store_one); @@ -4504,9 +4496,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSENoPartial(); + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(write_left_pre) << *write_left_pre; EXPECT_INS_RETAINED(read_return) << *read_return; @@ -4612,9 +4602,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSENoPartial(); + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(read_return); EXPECT_INS_RETAINED(write_right); @@ -4700,9 +4688,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved5) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSENoPartial(); + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(read_bottom); EXPECT_INS_RETAINED(write_right); @@ -4785,12 +4771,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved6) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre LSE " << blks; - PerformLSENoPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSENoPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(write_right); @@ -4829,8 +4810,9 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) { CreateGraph(/*handles=*/&vshs); AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", - {{"entry", "critical_break"}, - {"entry", "partial"}, + {{"entry", "first_block"}, + {"first_block", "critical_break"}, + {"first_block", "partial"}, {"partial", "merge"}, {"critical_break", "merge"}, {"merge", "left"}, @@ -4839,7 +4821,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) { {"right", "breturn"}, {"breturn", "exit"}})); #define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); + GET_BLOCK(first_block); GET_BLOCK(merge); GET_BLOCK(partial); GET_BLOCK(critical_break); @@ -4858,12 +4840,12 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) { HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst); HInstruction* if_inst = new (GetAllocator()) HIf(cmp_instructions.cmp_); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - cmp_instructions.AddSetup(entry); - entry->AddInstruction(cmp_instructions.cmp_); - entry->AddInstruction(if_inst); + first_block->AddInstruction(cls); + first_block->AddInstruction(new_inst); + first_block->AddInstruction(write_entry); + cmp_instructions.AddSetup(first_block); + first_block->AddInstruction(cmp_instructions.cmp_); + first_block->AddInstruction(if_inst); ManuallyBuildEnvFor(cls, {}); cmp_instructions.AddEnvironment(cls->GetEnvironment()); new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); @@ -4897,12 +4879,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); std::vector<HPhi*> merges; HPredicatedInstanceFieldGet* pred_get; @@ -5026,11 +5003,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortBeforeEscape) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); std::vector<HPhi*> merges; HInstanceFieldSet* init_set = @@ -5157,11 +5130,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonAfterCohort) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); std::vector<HPhi*> merges; HInstanceFieldSet* init_set = @@ -5290,12 +5259,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortAfterEscape) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); std::vector<HPhi*> merges; std::vector<HInstanceFieldSet*> sets; @@ -5424,12 +5388,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore1) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(write_bottom); EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()); @@ -5539,11 +5498,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(write_bottom); EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_bottom; @@ -5627,11 +5582,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad1) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(write_right); @@ -5748,11 +5699,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad1) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom1); EXPECT_INS_REMOVED(read_bottom2); @@ -5901,11 +5848,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom1); EXPECT_INS_REMOVED(read_bottom2); @@ -6078,11 +6021,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(early_exit_left_read); EXPECT_INS_REMOVED(early_exit_right_read); @@ -6212,11 +6151,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(read_right); @@ -6334,11 +6269,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(read_early_return); @@ -6447,11 +6378,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(write_right); @@ -6585,11 +6512,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(write_right); @@ -6688,11 +6611,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoadDefaultValue) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_RETAINED(write_left); @@ -6861,11 +6780,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis1) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7045,11 +6960,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7196,11 +7107,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7344,11 +7251,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7492,11 +7395,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis5) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7657,11 +7556,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis6) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7757,17 +7652,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); + PerformLSEWithPartial(blks); // Run the code-simplifier too - LOG(INFO) << "Pre simplification " << blks; - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post LSE " << blks; + PerformSimplifications(blks); EXPECT_INS_REMOVED(write_right); EXPECT_INS_REMOVED(write_start); @@ -7851,17 +7739,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); + PerformLSEWithPartial(blks); // Run the code-simplifier too - LOG(INFO) << "Pre simplification " << blks; - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post LSE " << blks; + PerformSimplifications(blks); EXPECT_INS_REMOVED(write_right); EXPECT_INS_REMOVED(write_start); @@ -7961,17 +7842,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); + PerformLSEWithPartial(blks); // Run the code-simplifier too - LOG(INFO) << "Pre simplification " << blks; - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post LSE " << blks; + PerformSimplifications(blks); EXPECT_INS_REMOVED(write_case2); EXPECT_INS_REMOVED(write_case3); @@ -8069,17 +7943,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); + PerformLSEWithPartial(blks); // Run the code-simplifier too - LOG(INFO) << "Pre simplification " << blks; - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post LSE " << blks; + PerformSimplifications(blks); EXPECT_INS_REMOVED(write_case2); EXPECT_INS_REMOVED(write_case3); @@ -8225,11 +8092,7 @@ TEST_F(LoadStoreEliminationTest, PartialIrreducibleLoop) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_TRUE(loop_header->IsLoopHeader()); EXPECT_TRUE(loop_header->GetLoopInformation()->IsIrreducible()); @@ -8382,11 +8245,7 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(cls); EXPECT_INS_REMOVED(new_inst); @@ -8544,11 +8403,7 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(cls); EXPECT_INS_REMOVED(new_inst); @@ -8752,11 +8607,7 @@ TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3) SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(cls); EXPECT_INS_REMOVED(new_inst); diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 5879c6fa07..f40b7f4f0c 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -21,7 +21,7 @@ #include "code_generator.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { // Verify that Location is trivially copyable. static_assert(std::is_trivially_copyable<Location>::value, "Location should be trivially copyable"); @@ -57,7 +57,7 @@ LocationSummary::LocationSummary(HInstruction* instruction, Location Location::RegisterOrConstant(HInstruction* instruction) { return instruction->IsConstant() - ? Location::ConstantLocation(instruction->AsConstant()) + ? Location::ConstantLocation(instruction) : Location::RequiresRegister(); } @@ -85,16 +85,23 @@ Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) { Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { return instruction->IsConstant() - ? Location::ConstantLocation(instruction->AsConstant()) + ? Location::ConstantLocation(instruction) : Location::RegisterLocation(reg); } Location Location::FpuRegisterOrConstant(HInstruction* instruction) { return instruction->IsConstant() - ? Location::ConstantLocation(instruction->AsConstant()) + ? Location::ConstantLocation(instruction) : Location::RequiresFpuRegister(); } +void Location::DCheckInstructionIsConstant(HInstruction* instruction) { + DCHECK(instruction != nullptr); + DCHECK(instruction->IsConstant()); + DCHECK_EQ(reinterpret_cast<uintptr_t>(instruction), + reinterpret_cast<uintptr_t>(instruction->AsConstant())); +} + std::ostream& operator<<(std::ostream& os, const Location& location) { os << location.DebugString(); if (location.IsRegister() || location.IsFpuRegister()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index acaea71a49..7ee076f442 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -22,9 +22,10 @@ #include "base/bit_field.h" #include "base/bit_utils.h" #include "base/bit_vector.h" +#include "base/macros.h" #include "base/value_object.h" -namespace art { +namespace art HIDDEN { class HConstant; class HInstruction; @@ -102,8 +103,12 @@ class Location : public ValueObject { return (value_ & kLocationConstantMask) == kConstant; } - static Location ConstantLocation(HConstant* constant) { + static Location ConstantLocation(HInstruction* constant) { DCHECK(constant != nullptr); + if (kIsDebugBuild) { + // Call out-of-line helper to avoid circular dependency with `nodes.h`. + DCheckInstructionIsConstant(constant); + } return Location(kConstant | reinterpret_cast<uintptr_t>(constant)); } @@ -425,6 +430,8 @@ class Location : public ValueObject { return PayloadField::Decode(value_); } + static void DCheckInstructionIsConstant(HInstruction* instruction); + using KindField = BitField<Kind, 0, kBitsForKind>; using PayloadField = BitField<uintptr_t, kBitsForKind, kBitsForPayload>; diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc index 76bd8493b2..95e81533da 100644 --- a/compiler/optimizing/loop_analysis.cc +++ b/compiler/optimizing/loop_analysis.cc @@ -20,7 +20,7 @@ #include "code_generator.h" #include "induction_var_range.h" -namespace art { +namespace art HIDDEN { void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, LoopAnalysisInfo* analysis_results, diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h index fbf1516f64..cec00fecf4 100644 --- a/compiler/optimizing/loop_analysis.h +++ b/compiler/optimizing/loop_analysis.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ #define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class InductionVarRange; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 2d7c20825c..7a52502562 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -27,7 +27,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { // Enables vectorization (SIMDization) in the loop optimizer. static constexpr bool kEnableVectorization = true; @@ -507,9 +507,8 @@ bool HLoopOptimization::Run() { graph_->SetHasLoops(false); // no more loops } - // Detach. + // Detach allocator. loop_allocator_ = nullptr; - last_loop_ = top_loop_ = nullptr; return did_loop_opt; } @@ -530,11 +529,7 @@ bool HLoopOptimization::LocalRun() { AddLoop(block->GetLoopInformation()); } } - - // TODO(solanes): How can `top_loop_` be null if `graph_->HasLoops()` is true? - if (top_loop_ == nullptr) { - return false; - } + DCHECK(top_loop_ != nullptr); // Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use // temporary data structures using the phase-local allocator. All new HIR @@ -681,6 +676,50 @@ void HLoopOptimization::CalculateAndSetTryCatchKind(LoopNode* node) { } // +// This optimization applies to loops with plain simple operations +// (I.e. no calls to java code or runtime) with a known small trip_count * instr_count +// value. +// +bool HLoopOptimization::TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info, + bool generate_code) { + if (!graph_->SuspendChecksAreAllowedToNoOp()) { + return false; + } + + int64_t trip_count = analysis_info->GetTripCount(); + + if (trip_count == LoopAnalysisInfo::kUnknownTripCount) { + return false; + } + + int64_t instruction_count = analysis_info->GetNumberOfInstructions(); + int64_t total_instruction_count = trip_count * instruction_count; + + // The inclusion of the HasInstructionsPreventingScalarOpts() prevents this + // optimization from being applied to loops that have calls. + bool can_optimize = + total_instruction_count <= HLoopOptimization::kMaxTotalInstRemoveSuspendCheck && + !analysis_info->HasInstructionsPreventingScalarOpts(); + + if (!can_optimize) { + return false; + } + + // If we should do the optimization, disable codegen for the SuspendCheck. + if (generate_code) { + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + HBasicBlock* header = loop_info->GetHeader(); + HSuspendCheck* instruction = header->GetLoopInformation()->GetSuspendCheck(); + // As other optimizations depend on SuspendCheck + // (e.g: CHAGuardVisitor::HoistGuard), disable its codegen instead of + // removing the SuspendCheck instruction. + instruction->SetIsNoOp(true); + } + + return true; +} + +// // Optimization. // @@ -824,7 +863,7 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { } bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { - return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node); + return TryOptimizeInnerLoopFinite(node) || TryLoopScalarOpts(node); } // @@ -928,7 +967,7 @@ bool HLoopOptimization::TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool g return true; } -bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { +bool HLoopOptimization::TryLoopScalarOpts(LoopNode* node) { HLoopInformation* loop_info = node->loop_info; int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_); LoopAnalysisInfo analysis_info(loop_info); @@ -941,10 +980,16 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { if (!TryFullUnrolling(&analysis_info, /*generate_code*/ false) && !TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) && - !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) { + !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false) && + !TryToRemoveSuspendCheckFromLoopHeader(&analysis_info, /*generate_code*/ false)) { return false; } + // Try the suspend check removal even for non-clonable loops. Also this + // optimization doesn't interfere with other scalar loop optimizations so it can + // be done prior to them. + bool removed_suspend_check = TryToRemoveSuspendCheckFromLoopHeader(&analysis_info); + // Run 'IsLoopClonable' the last as it might be time-consuming. if (!LoopClonerHelper::IsLoopClonable(loop_info)) { return false; @@ -952,7 +997,7 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { return TryFullUnrolling(&analysis_info) || TryPeelingForLoopInvariantExitsElimination(&analysis_info) || - TryUnrollingForBranchPenaltyReduction(&analysis_info); + TryUnrollingForBranchPenaltyReduction(&analysis_info) || removed_suspend_check; } // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index b17861648f..6dd778ba74 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "induction_var_range.h" @@ -25,7 +26,7 @@ #include "optimization.h" #include "superblock_cloner.h" -namespace art { +namespace art HIDDEN { class CompilerOptions; class ArchNoOptsLoopHelper; @@ -47,6 +48,11 @@ class HLoopOptimization : public HOptimization { static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; + // The maximum number of total instructions (trip_count * instruction_count), + // where the optimization of removing SuspendChecks from the loop header could + // be performed. + static constexpr int64_t kMaxTotalInstRemoveSuspendCheck = 128; + private: /** * A single loop inside the loop hierarchy representation. @@ -179,8 +185,19 @@ class HLoopOptimization : public HOptimization { // should be actually applied. bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true); - // Tries to apply scalar loop peeling and unrolling. - bool TryPeelingAndUnrolling(LoopNode* node); + // Tries to remove SuspendCheck for plain loops with a low trip count. The + // SuspendCheck in the codegen makes sure that the thread can be interrupted + // during execution for GC. Not being able to do so might decrease the + // responsiveness of GC when a very long loop or a long recursion is being + // executed. However, for plain loops with a small trip count, the removal of + // SuspendCheck should not affect the GC's responsiveness by a large margin. + // Consequently, since the thread won't be interrupted for plain loops, it is + // assumed that the performance might increase by removing SuspendCheck. + bool TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info, + bool generate_code = true); + + // Tries to apply scalar loop optimizations. + bool TryLoopScalarOpts(LoopNode* node); // // Vectorization analysis and synthesis. diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index bda25283f5..7f694fb655 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -14,12 +14,13 @@ * limitations under the License. */ +#include "base/macros.h" #include "code_generator.h" #include "driver/compiler_options.h" #include "loop_optimization.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for the loop optimization tests. These unit tests focus @@ -94,10 +95,7 @@ class LoopOptimizationTest : public OptimizingUnitTest { void PerformAnalysis() { graph_->BuildDominatorTree(); iva_->Run(); - // Do not release the loop hierarchy. - ScopedArenaAllocator loop_allocator(GetArenaStack()); - loop_opt_->loop_allocator_ = &loop_allocator; - loop_opt_->LocalRun(); + loop_opt_->Run(); } /** Constructs string representation of computed loop hierarchy. */ diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index d35ed1c543..3790058879 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -40,7 +40,7 @@ #include "scoped_thread_state_change-inl.h" #include "ssa_builder.h" -namespace art { +namespace art HIDDEN { // Enable floating-point static evaluation during constant folding // only if all floating-point operations and constants evaluate in the @@ -150,30 +150,54 @@ static void RemoveAsUser(HInstruction* instruction) { RemoveEnvironmentUses(instruction); } -void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const { +void HGraph::RemoveDeadBlocksInstructionsAsUsersAndDisconnect(const ArenaBitVector& visited) const { for (size_t i = 0; i < blocks_.size(); ++i) { if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_[i]; if (block == nullptr) continue; + + // Remove as user. for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { RemoveAsUser(it.Current()); } for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { RemoveAsUser(it.Current()); } + + // Remove non-catch phi uses, and disconnect the block. + block->DisconnectFromSuccessors(&visited); + } + } +} + +// This method assumes `insn` has been removed from all users with the exception of catch +// phis because of missing exceptional edges in the graph. It removes the +// instruction from catch phi uses, together with inputs of other catch phis in +// the catch block at the same index, as these must be dead too. +static void RemoveCatchPhiUsesOfDeadInstruction(HInstruction* insn) { + DCHECK(!insn->HasEnvironmentUses()); + while (insn->HasNonEnvironmentUses()) { + const HUseListNode<HInstruction*>& use = insn->GetUses().front(); + size_t use_index = use.GetIndex(); + HBasicBlock* user_block = use.GetUser()->GetBlock(); + DCHECK(use.GetUser()->IsPhi()); + DCHECK(user_block->IsCatchBlock()); + for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + phi_it.Current()->AsPhi()->RemoveInputAt(use_index); } } } void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) { + DCHECK(reverse_post_order_.empty()) << "We shouldn't have dominance information."; for (size_t i = 0; i < blocks_.size(); ++i) { if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_[i]; if (block == nullptr) continue; - // We only need to update the successor, which might be live. - for (HBasicBlock* successor : block->GetSuccessors()) { - successor->RemovePredecessor(block); - } + + // Remove all remaining uses (which should be only catch phi uses), and the instructions. + block->RemoveCatchPhiUsesAndInstruction(/* building_dominator_tree = */ true); + // Remove the block from the list of blocks, so that further analyses // never see it. blocks_[i] = nullptr; @@ -200,7 +224,8 @@ GraphAnalysisResult HGraph::BuildDominatorTree() { // (2) Remove instructions and phis from blocks not visited during // the initial DFS as users from other instructions, so that // users can be safely removed before uses later. - RemoveInstructionsAsUsersFromDeadBlocks(visited); + // Also disconnect the block from its successors, updating the successor's phis if needed. + RemoveDeadBlocksInstructionsAsUsersAndDisconnect(visited); // (3) Remove blocks not visited during the initial DFS. // Step (5) requires dead blocks to be removed from the @@ -237,6 +262,7 @@ void HGraph::ClearDominanceInformation() { } void HGraph::ClearLoopInformation() { + SetHasLoops(false); SetHasIrreducibleLoops(false); for (HBasicBlock* block : GetActiveBlocks()) { block->SetLoopInformation(nullptr); @@ -544,6 +570,15 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { } } +HBasicBlock* HGraph::SplitEdgeAndUpdateRPO(HBasicBlock* block, HBasicBlock* successor) { + HBasicBlock* new_block = SplitEdge(block, successor); + // In the RPO we have {... , block, ... , successor}. We want to insert `new_block` right after + // `block` to have a consistent RPO without recomputing the whole graph's RPO. + reverse_post_order_.insert( + reverse_post_order_.begin() + IndexOfElement(reverse_post_order_, block) + 1, new_block); + return new_block; +} + // Reorder phi inputs to match reordering of the block's predecessors. static void FixPhisAfterPredecessorsReodering(HBasicBlock* block, size_t first, size_t second) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { @@ -653,7 +688,7 @@ void HGraph::TransformLoopToSinglePreheaderFormat(HBasicBlock* header) { 0, header_phi->GetType()); if (header_phi->GetType() == DataType::Type::kReference) { - preheader_phi->SetReferenceTypeInfo(header_phi->GetReferenceTypeInfo()); + preheader_phi->SetReferenceTypeInfoIfValid(header_phi->GetReferenceTypeInfo()); } preheader->AddPhi(preheader_phi); @@ -708,6 +743,8 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { void HGraph::ComputeTryBlockInformation() { // Iterate in reverse post order to propagate try membership information from // predecessors to their successors. + bool graph_has_try_catch = false; + for (HBasicBlock* block : GetReversePostOrder()) { if (block->IsEntryBlock() || block->IsCatchBlock()) { // Catch blocks after simplification have only exceptional predecessors @@ -722,6 +759,7 @@ void HGraph::ComputeTryBlockInformation() { DCHECK_IMPLIES(block->IsLoopHeader(), !block->GetLoopInformation()->IsBackEdge(*first_predecessor)); const HTryBoundary* try_entry = first_predecessor->ComputeTryEntryOfSuccessors(); + graph_has_try_catch |= try_entry != nullptr; if (try_entry != nullptr && (block->GetTryCatchInformation() == nullptr || try_entry != &block->GetTryCatchInformation()->GetTryEntry())) { @@ -730,6 +768,8 @@ void HGraph::ComputeTryBlockInformation() { block->SetTryCatchInformation(new (allocator_) TryCatchInformation(*try_entry)); } } + + SetHasTryCatch(graph_has_try_catch); } void HGraph::SimplifyCFG() { @@ -1459,6 +1499,10 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1, UNREACHABLE(); } +bool HInstruction::Dominates(HInstruction* other_instruction) const { + return other_instruction == this || StrictlyDominates(other_instruction); +} + bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const { if (other_instruction == this) { // An instruction does not strictly dominate itself. @@ -1518,14 +1562,19 @@ void HInstruction::ReplaceWith(HInstruction* other) { DCHECK(env_uses_.empty()); } -void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) { +void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, + HInstruction* replacement, + bool strictly_dominated) { const HUseList<HInstruction*>& uses = GetUses(); for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { HInstruction* user = it->GetUser(); size_t index = it->GetIndex(); // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). ++it; - if (dominator->StrictlyDominates(user)) { + const bool dominated = + strictly_dominated ? dominator->StrictlyDominates(user) : dominator->Dominates(user); + + if (dominated) { user->ReplaceInput(replacement, index); } else if (user->IsPhi() && !user->AsPhi()->IsCatchPhi()) { // If the input flows from a block dominated by `dominator`, we can replace it. @@ -2108,8 +2157,9 @@ void HInstruction::MoveBeforeFirstUserAndOutOfLoops() { MoveBefore(insert_pos); } -HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { - DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; +HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor, bool require_graph_not_in_ssa_form) { + DCHECK_IMPLIES(require_graph_not_in_ssa_form, !graph_->IsInSsaForm()) + << "Support for SSA form not implemented."; DCHECK_EQ(cursor->GetBlock(), this); HBasicBlock* new_block = @@ -2376,24 +2426,6 @@ void HInstructionList::Add(const HInstructionList& instruction_list) { } } -// Should be called on instructions in a dead block in post order. This method -// assumes `insn` has been removed from all users with the exception of catch -// phis because of missing exceptional edges in the graph. It removes the -// instruction from catch phi uses, together with inputs of other catch phis in -// the catch block at the same index, as these must be dead too. -static void RemoveUsesOfDeadInstruction(HInstruction* insn) { - DCHECK(!insn->HasEnvironmentUses()); - while (insn->HasNonEnvironmentUses()) { - const HUseListNode<HInstruction*>& use = insn->GetUses().front(); - size_t use_index = use.GetIndex(); - HBasicBlock* user_block = use.GetUser()->GetBlock(); - DCHECK(use.GetUser()->IsPhi() && user_block->IsCatchBlock()); - for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - phi_it.Current()->AsPhi()->RemoveInputAt(use_index); - } - } -} - void HBasicBlock::DisconnectAndDelete() { // Dominators must be removed after all the blocks they dominate. This way // a loop header is removed last, a requirement for correct loop information @@ -2418,52 +2450,14 @@ void HBasicBlock::DisconnectAndDelete() { } // (2) Disconnect the block from its successors and update their phis. - for (HBasicBlock* successor : successors_) { - // Delete this block from the list of predecessors. - size_t this_index = successor->GetPredecessorIndexOf(this); - successor->predecessors_.erase(successor->predecessors_.begin() + this_index); - - // Check that `successor` has other predecessors, otherwise `this` is the - // dominator of `successor` which violates the order DCHECKed at the top. - DCHECK(!successor->predecessors_.empty()); - - // Remove this block's entries in the successor's phis. Skip exceptional - // successors because catch phi inputs do not correspond to predecessor - // blocks but throwing instructions. The inputs of the catch phis will be - // updated in step (3). - if (!successor->IsCatchBlock()) { - if (successor->predecessors_.size() == 1u) { - // The successor has just one predecessor left. Replace phis with the only - // remaining input. - for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - HPhi* phi = phi_it.Current()->AsPhi(); - phi->ReplaceWith(phi->InputAt(1 - this_index)); - successor->RemovePhi(phi); - } - } else { - for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - phi_it.Current()->AsPhi()->RemoveInputAt(this_index); - } - } - } - } - successors_.clear(); + DisconnectFromSuccessors(); // (3) Remove instructions and phis. Instructions should have no remaining uses // except in catch phis. If an instruction is used by a catch phi at `index`, // remove `index`-th input of all phis in the catch block since they are // guaranteed dead. Note that we may miss dead inputs this way but the // graph will always remain consistent. - for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* insn = it.Current(); - RemoveUsesOfDeadInstruction(insn); - RemoveInstruction(insn); - } - for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) { - HPhi* insn = it.Current()->AsPhi(); - RemoveUsesOfDeadInstruction(insn); - RemovePhi(insn); - } + RemoveCatchPhiUsesAndInstruction(/* building_dominator_tree = */ false); // (4) Disconnect the block from its predecessors and update their // control-flow instructions. @@ -2537,6 +2531,70 @@ void HBasicBlock::DisconnectAndDelete() { SetGraph(nullptr); } +void HBasicBlock::DisconnectFromSuccessors(const ArenaBitVector* visited) { + for (HBasicBlock* successor : successors_) { + // Delete this block from the list of predecessors. + size_t this_index = successor->GetPredecessorIndexOf(this); + successor->predecessors_.erase(successor->predecessors_.begin() + this_index); + + if (visited != nullptr && !visited->IsBitSet(successor->GetBlockId())) { + // `successor` itself is dead. Therefore, there is no need to update its phis. + continue; + } + + DCHECK(!successor->predecessors_.empty()); + + // Remove this block's entries in the successor's phis. Skips exceptional + // successors because catch phi inputs do not correspond to predecessor + // blocks but throwing instructions. They are removed in `RemoveCatchPhiUses`. + if (!successor->IsCatchBlock()) { + if (successor->predecessors_.size() == 1u) { + // The successor has just one predecessor left. Replace phis with the only + // remaining input. + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + HPhi* phi = phi_it.Current()->AsPhi(); + phi->ReplaceWith(phi->InputAt(1 - this_index)); + successor->RemovePhi(phi); + } + } else { + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + phi_it.Current()->AsPhi()->RemoveInputAt(this_index); + } + } + } + } + successors_.clear(); +} + +void HBasicBlock::RemoveCatchPhiUsesAndInstruction(bool building_dominator_tree) { + for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* insn = it.Current(); + RemoveCatchPhiUsesOfDeadInstruction(insn); + + // If we are building the dominator tree, we removed all input records previously. + // `RemoveInstruction` will try to remove them again but that's not something we support and we + // will crash. We check here since we won't be checking that in RemoveInstruction. + if (building_dominator_tree) { + DCHECK(insn->GetUses().empty()); + DCHECK(insn->GetEnvUses().empty()); + } + RemoveInstruction(insn, /* ensure_safety= */ !building_dominator_tree); + } + for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) { + HPhi* insn = it.Current()->AsPhi(); + RemoveCatchPhiUsesOfDeadInstruction(insn); + + // If we are building the dominator tree, we removed all input records previously. + // `RemovePhi` will try to remove them again but that's not something we support and we + // will crash. We check here since we won't be checking that in RemovePhi. + if (building_dominator_tree) { + DCHECK(insn->GetUses().empty()); + DCHECK(insn->GetEnvUses().empty()); + } + RemovePhi(insn, /* ensure_safety= */ !building_dominator_tree); + } +} + void HBasicBlock::MergeInstructionsWith(HBasicBlock* other) { DCHECK(EndsWithControlFlowInstruction()); RemoveInstruction(GetLastInstruction()); @@ -2660,7 +2718,8 @@ void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) { void HGraph::UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block, HBasicBlock* reference, - bool replace_if_back_edge) { + bool replace_if_back_edge, + bool has_more_specific_try_catch_info) { if (block->IsLoopHeader()) { // Clear the information of which blocks are contained in that loop. Since the // information is stored as a bit vector based on block ids, we have to update @@ -2687,11 +2746,16 @@ void HGraph::UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block, } } - // Copy TryCatchInformation if `reference` is a try block, not if it is a catch block. - TryCatchInformation* try_catch_info = reference->IsTryBlock() - ? reference->GetTryCatchInformation() - : nullptr; - block->SetTryCatchInformation(try_catch_info); + DCHECK_IMPLIES(has_more_specific_try_catch_info, !reference->IsTryBlock()) + << "We don't allow to inline try catches inside of other try blocks."; + + // Update the TryCatchInformation, if we are not inlining a try catch. + if (!has_more_specific_try_catch_info) { + // Copy TryCatchInformation if `reference` is a try block, not if it is a catch block. + TryCatchInformation* try_catch_info = + reference->IsTryBlock() ? reference->GetTryCatchInformation() : nullptr; + block->SetTryCatchInformation(try_catch_info); + } } HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { @@ -2730,9 +2794,15 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (HasTryCatch()) { outer_graph->SetHasTryCatch(true); } + if (HasMonitorOperations()) { + outer_graph->SetHasMonitorOperations(true); + } if (HasSIMD()) { outer_graph->SetHasSIMD(true); } + if (HasAlwaysThrowingInvokes()) { + outer_graph->SetHasAlwaysThrowingInvokes(true); + } HInstruction* return_value = nullptr; if (GetBlocks().size() == 3) { @@ -2771,6 +2841,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { HBasicBlock* first = entry_block_->GetSuccessors()[0]; DCHECK(!first->IsInLoop()); + DCHECK(first->GetTryCatchInformation() == nullptr); at->MergeWithInlined(first); exit_block_->ReplaceWith(to); @@ -2801,12 +2872,14 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // and (4) to the blocks that apply. for (HBasicBlock* current : GetReversePostOrder()) { if (current != exit_block_ && current != entry_block_ && current != first) { - DCHECK(current->GetTryCatchInformation() == nullptr); DCHECK(current->GetGraph() == this); current->SetGraph(outer_graph); outer_graph->AddBlock(current); outer_graph->reverse_post_order_[++index_of_at] = current; - UpdateLoopAndTryInformationOfNewBlock(current, at, /* replace_if_back_edge= */ false); + UpdateLoopAndTryInformationOfNewBlock(current, + at, + /* replace_if_back_edge= */ false, + current->GetTryCatchInformation() != nullptr); } } @@ -2820,25 +2893,62 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Update all predecessors of the exit block (now the `to` block) // to not `HReturn` but `HGoto` instead. Special case throwing blocks - // to now get the outer graph exit block as successor. Note that the inliner - // currently doesn't support inlining methods with try/catch. + // to now get the outer graph exit block as successor. HPhi* return_value_phi = nullptr; bool rerun_dominance = false; bool rerun_loop_analysis = false; for (size_t pred = 0; pred < to->GetPredecessors().size(); ++pred) { HBasicBlock* predecessor = to->GetPredecessors()[pred]; HInstruction* last = predecessor->GetLastInstruction(); + + // At this point we might either have: + // A) Return/ReturnVoid/Throw as the last instruction, or + // B) `Return/ReturnVoid/Throw->TryBoundary` as the last instruction chain + + const bool saw_try_boundary = last->IsTryBoundary(); + if (saw_try_boundary) { + DCHECK(predecessor->IsSingleTryBoundary()); + DCHECK(!last->AsTryBoundary()->IsEntry()); + predecessor = predecessor->GetSinglePredecessor(); + last = predecessor->GetLastInstruction(); + } + if (last->IsThrow()) { - DCHECK(!at->IsTryBlock()); - predecessor->ReplaceSuccessor(to, outer_graph->GetExitBlock()); + if (at->IsTryBlock()) { + DCHECK(!saw_try_boundary) << "We don't support inlining of try blocks into try blocks."; + // Create a TryBoundary of kind:exit and point it to the Exit block. + HBasicBlock* new_block = outer_graph->SplitEdge(predecessor, to); + new_block->AddInstruction( + new (allocator) HTryBoundary(HTryBoundary::BoundaryKind::kExit, last->GetDexPc())); + new_block->ReplaceSuccessor(to, outer_graph->GetExitBlock()); + + // Copy information from the predecessor. + new_block->SetLoopInformation(predecessor->GetLoopInformation()); + TryCatchInformation* try_catch_info = predecessor->GetTryCatchInformation(); + new_block->SetTryCatchInformation(try_catch_info); + for (HBasicBlock* xhandler : + try_catch_info->GetTryEntry().GetBlock()->GetExceptionalSuccessors()) { + new_block->AddSuccessor(xhandler); + } + DCHECK(try_catch_info->GetTryEntry().HasSameExceptionHandlersAs( + *new_block->GetLastInstruction()->AsTryBoundary())); + } else { + // We either have `Throw->TryBoundary` or `Throw`. We want to point the whole chain to the + // exit, so we recompute `predecessor` + predecessor = to->GetPredecessors()[pred]; + predecessor->ReplaceSuccessor(to, outer_graph->GetExitBlock()); + } + --pred; // We need to re-run dominance information, as the exit block now has - // a new dominator. + // a new predecessor and potential new dominator. + // TODO(solanes): See if it's worth it to hand-modify the domination chain instead of + // rerunning the dominance for the whole graph. rerun_dominance = true; if (predecessor->GetLoopInformation() != nullptr) { - // The exit block and blocks post dominated by the exit block do not belong - // to any loop. Because we do not compute the post dominators, we need to re-run - // loop analysis to get the loop information correct. + // The loop information might have changed e.g. `predecessor` might not be in a loop + // anymore. We only do this if `predecessor` has loop information as it is impossible for + // predecessor to end up in a loop if it wasn't in one before. rerun_loop_analysis = true; } } else { @@ -2863,6 +2973,19 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { } predecessor->AddInstruction(new (allocator) HGoto(last->GetDexPc())); predecessor->RemoveInstruction(last); + + if (saw_try_boundary) { + predecessor = to->GetPredecessors()[pred]; + DCHECK(predecessor->EndsWithTryBoundary()); + DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u); + if (predecessor->GetSuccessors()[0]->GetPredecessors().size() > 1) { + outer_graph->SplitCriticalEdge(predecessor, to); + rerun_dominance = true; + if (predecessor->GetLoopInformation() != nullptr) { + rerun_loop_analysis = true; + } + } + } } } if (rerun_loop_analysis) { @@ -3047,6 +3170,7 @@ HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, HSuspendCheck* suspend_check = new (allocator_) HSuspendCheck(header->GetDexPc()); new_header->AddInstruction(suspend_check); new_body->AddInstruction(new (allocator_) HGoto()); + DCHECK(loop->GetSuspendCheck() != nullptr); suspend_check->CopyEnvironmentFromWithLoopPhiAdjustment( loop->GetSuspendCheck()->GetEnvironment(), header); @@ -3091,6 +3215,12 @@ void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { SetPackedFlag<kFlagReferenceTypeIsExact>(rti.IsExact()); } +void HInstruction::SetReferenceTypeInfoIfValid(ReferenceTypeInfo rti) { + if (rti.IsValid()) { + SetReferenceTypeInfo(rti); + } +} + bool HBoundType::InstructionDataEquals(const HInstruction* other) const { const HBoundType* other_bt = other->AsBoundType(); ScopedObjectAccess soa(Thread::Current()); @@ -3441,8 +3571,8 @@ static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) { return kCanThrow; } -void HInvoke::SetResolvedMethod(ArtMethod* method) { - if (method != nullptr && method->IsIntrinsic()) { +void HInvoke::SetResolvedMethod(ArtMethod* method, bool enable_intrinsic_opt) { + if (method != nullptr && method->IsIntrinsic() && enable_intrinsic_opt) { Intrinsics intrinsic = static_cast<Intrinsics>(method->GetIntrinsic()); SetIntrinsic(intrinsic, NeedsEnvironmentIntrinsic(intrinsic), diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 7a0059f616..28112d176a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -29,6 +29,7 @@ #include "base/array_ref.h" #include "base/intrusive_forward_list.h" #include "base/iteration_range.h" +#include "base/macros.h" #include "base/mutex.h" #include "base/quasi_atomic.h" #include "base/stl_util.h" @@ -51,7 +52,7 @@ #include "mirror/method_type.h" #include "offsets.h" -namespace art { +namespace art HIDDEN { class ArenaStack; class CodeGenerator; @@ -406,6 +407,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { has_loops_(false), has_irreducible_loops_(false), has_direct_critical_native_call_(false), + has_always_throwing_invokes_(false), dead_reference_safe_(dead_reference_safe), debuggable_(debuggable), current_instruction_id_(start_instruction_id), @@ -485,9 +487,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // Update the loop and try membership of `block`, which was spawned from `reference`. // In case `reference` is a back edge, `replace_if_back_edge` notifies whether `block` // should be the new back edge. + // `has_more_specific_try_catch_info` will be set to true when inlining a try catch. void UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block, HBasicBlock* reference, - bool replace_if_back_edge); + bool replace_if_back_edge, + bool has_more_specific_try_catch_info = false); // Need to add a couple of blocks to test if the loop body is entered and // put deoptimization instructions, etc. @@ -510,6 +514,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { HBasicBlock* SplitEdge(HBasicBlock* block, HBasicBlock* successor); void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor); + + // Splits the edge between `block` and `successor` and then updates the graph's RPO to keep + // consistency without recomputing the whole graph. + HBasicBlock* SplitEdgeAndUpdateRPO(HBasicBlock* block, HBasicBlock* successor); + void OrderLoopHeaderPredecessors(HBasicBlock* header); // Transform a loop into a format with a single preheader. @@ -678,6 +687,13 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return cha_single_implementation_list_; } + // In case of OSR we intend to use SuspendChecks as an entry point to the + // function; for debuggable graphs we might deoptimize to interpreter from + // SuspendChecks. In these cases we should always generate code for them. + bool SuspendChecksAreAllowedToNoOp() const { + return !IsDebuggable() && !IsCompilingOsr(); + } + void AddCHASingleImplementationDependency(ArtMethod* method) { cha_single_implementation_list_.insert(method); } @@ -704,6 +720,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasDirectCriticalNativeCall() const { return has_direct_critical_native_call_; } void SetHasDirectCriticalNativeCall(bool value) { has_direct_critical_native_call_ = value; } + bool HasAlwaysThrowingInvokes() const { return has_always_throwing_invokes_; } + void SetHasAlwaysThrowingInvokes(bool value) { has_always_throwing_invokes_ = value; } + ArtMethod* GetArtMethod() const { return art_method_; } void SetArtMethod(ArtMethod* method) { art_method_ = method; } @@ -719,12 +738,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return ReferenceTypeInfo::Create(handle_cache_.GetObjectClassHandle(), /* is_exact= */ false); } - uint32_t GetNumberOfCHAGuards() { return number_of_cha_guards_; } + uint32_t GetNumberOfCHAGuards() const { return number_of_cha_guards_; } void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; } void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; } private: - void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; + void RemoveDeadBlocksInstructionsAsUsersAndDisconnect(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited); template <class InstructionType, typename ValueType> @@ -792,14 +811,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { size_t temporaries_vreg_slots_; // Flag whether there are bounds checks in the graph. We can skip - // BCE if it's false. It's only best effort to keep it up to date in - // the presence of code elimination so there might be false positives. + // BCE if it's false. bool has_bounds_checks_; // Flag whether there are try/catch blocks in the graph. We will skip - // try/catch-related passes if it's false. It's only best effort to keep - // it up to date in the presence of code elimination so there might be - // false positives. + // try/catch-related passes if it's false. bool has_try_catch_; // Flag whether there are any HMonitorOperation in the graph. If yes this will mandate @@ -812,20 +828,19 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool has_simd_; // Flag whether there are any loops in the graph. We can skip loop - // optimization if it's false. It's only best effort to keep it up - // to date in the presence of code elimination so there might be false - // positives. + // optimization if it's false. bool has_loops_; - // Flag whether there are any irreducible loops in the graph. It's only - // best effort to keep it up to date in the presence of code elimination - // so there might be false positives. + // Flag whether there are any irreducible loops in the graph. bool has_irreducible_loops_; // Flag whether there are any direct calls to native code registered // for @CriticalNative methods. bool has_direct_critical_native_call_; + // Flag whether the graph contains invokes that always throw. + bool has_always_throwing_invokes_; + // Is the code known to be robust against eliminating dead references // and the effects of early finalization? If false, dead reference variables // are kept if they might be visible to the garbage collector. @@ -1291,7 +1306,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // graph, create a Goto at the end of the former block and will create an edge // between the blocks. It will not, however, update the reverse post order or // loop and try/catch information. - HBasicBlock* SplitBefore(HInstruction* cursor); + HBasicBlock* SplitBefore(HInstruction* cursor, bool require_graph_not_in_ssa_form = true); // Split the block into two blocks just before `cursor`. Returns the newly // created block. Note that this method just updates raw block information, @@ -1332,6 +1347,20 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // are safely updated. void DisconnectAndDelete(); + // Disconnects `this` from all its successors and updates their phis, if the successors have them. + // If `visited` is provided, it will use the information to know if a successor is reachable and + // skip updating those phis. + void DisconnectFromSuccessors(const ArenaBitVector* visited = nullptr); + + // Removes the catch phi uses of the instructions in `this`, and then remove the instruction + // itself. If `building_dominator_tree` is true, it will not remove the instruction as user, since + // we do it in a previous step. This is a special case for building up the dominator tree: we want + // to eliminate uses before inputs but we don't have domination information, so we remove all + // connections from input/uses first before removing any instruction. + // This method assumes the instructions have been removed from all users with the exception of + // catch phis because of missing exceptional edges in the graph. + void RemoveCatchPhiUsesAndInstruction(bool building_dominator_tree); + void AddInstruction(HInstruction* instruction); // Insert `instruction` before/after an existing instruction `cursor`. void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); @@ -1540,10 +1569,10 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Min, BinaryOperation) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ - M(NativeDebugInfo, Instruction) \ M(Neg, UnaryOperation) \ M(NewArray, Instruction) \ M(NewInstance, Instruction) \ + M(Nop, Instruction) \ M(Not, UnaryOperation) \ M(NotEqual, Condition) \ M(NullConstant, Instruction) \ @@ -2348,7 +2377,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return GetType() == DataType::Type::kReference; } + // Sets the ReferenceTypeInfo. The RTI must be valid. void SetReferenceTypeInfo(ReferenceTypeInfo rti); + // Same as above, but we only set it if it's valid. Otherwise, we don't change the current RTI. + void SetReferenceTypeInfoIfValid(ReferenceTypeInfo rti); ReferenceTypeInfo GetReferenceTypeInfo() const { DCHECK_EQ(GetType(), DataType::Type::kReference); @@ -2408,7 +2440,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { !CanThrow() && !IsSuspendCheck() && !IsControlFlow() && - !IsNativeDebugInfo() && + !IsNop() && !IsParameterValue() && // If we added an explicit barrier then we should keep it. !IsMemoryBarrier() && @@ -2419,9 +2451,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return IsRemovable() && !HasUses(); } - // Does this instruction strictly dominate `other_instruction`? - // Returns false if this instruction and `other_instruction` are the same. - // Aborts if this instruction and `other_instruction` are both phis. + // Does this instruction dominate `other_instruction`? + // Aborts if this instruction and `other_instruction` are different phis. + bool Dominates(HInstruction* other_instruction) const; + + // Same but with `strictly dominates` i.e. returns false if this instruction and + // `other_instruction` are the same. bool StrictlyDominates(HInstruction* other_instruction) const; int GetId() const { return id_; } @@ -2486,7 +2521,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void SetLocations(LocationSummary* locations) { locations_ = locations; } void ReplaceWith(HInstruction* instruction); - void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); + void ReplaceUsesDominatedBy(HInstruction* dominator, + HInstruction* replacement, + bool strictly_dominated = true); void ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); void ReplaceInput(HInstruction* replacement, size_t index); @@ -3730,7 +3767,7 @@ class HClassTableGet final : public HExpression<1> { static constexpr size_t kNumberOfClassTableGetPackedBits = kFieldTableKind + kFieldTableKindSize; static_assert(kNumberOfClassTableGetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TableKindField = BitField<TableKind, kFieldTableKind, kFieldTableKind>; + using TableKindField = BitField<TableKind, kFieldTableKind, kFieldTableKindSize>; // The index of the ArtMethod in the table. const size_t index_; @@ -4700,7 +4737,7 @@ class HInvoke : public HVariableInputSizeInstruction { void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); } - bool AlwaysThrows() const override { return GetPackedFlag<kFlagAlwaysThrows>(); } + bool AlwaysThrows() const override final { return GetPackedFlag<kFlagAlwaysThrows>(); } bool CanBeMoved() const override { return IsIntrinsic() && !DoesAnyWrite(); } @@ -4719,7 +4756,7 @@ class HInvoke : public HVariableInputSizeInstruction { bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; } ArtMethod* GetResolvedMethod() const { return resolved_method_; } - void SetResolvedMethod(ArtMethod* method); + void SetResolvedMethod(ArtMethod* method, bool enable_intrinsic_opt); MethodReference GetMethodReference() const { return method_reference_; } @@ -4748,7 +4785,8 @@ class HInvoke : public HVariableInputSizeInstruction { MethodReference method_reference, ArtMethod* resolved_method, MethodReference resolved_method_reference, - InvokeType invoke_type) + InvokeType invoke_type, + bool enable_intrinsic_opt) : HVariableInputSizeInstruction( kind, return_type, @@ -4764,7 +4802,7 @@ class HInvoke : public HVariableInputSizeInstruction { intrinsic_optimizations_(0) { SetPackedField<InvokeTypeField>(invoke_type); SetPackedFlag<kFlagCanThrow>(true); - SetResolvedMethod(resolved_method); + SetResolvedMethod(resolved_method, enable_intrinsic_opt); } DEFAULT_COPY_CONSTRUCTOR(Invoke); @@ -4797,7 +4835,8 @@ class HInvokeUnresolved final : public HInvoke { method_reference, nullptr, MethodReference(nullptr, 0u), - invoke_type) { + invoke_type, + /* enable_intrinsic_opt= */ false) { } bool IsClonable() const override { return true; } @@ -4820,7 +4859,8 @@ class HInvokePolymorphic final : public HInvoke { // to pass intrinsic information to the HInvokePolymorphic node. ArtMethod* resolved_method, MethodReference resolved_method_reference, - dex::ProtoIndex proto_idx) + dex::ProtoIndex proto_idx, + bool enable_intrinsic_opt) : HInvoke(kInvokePolymorphic, allocator, number_of_arguments, @@ -4830,7 +4870,8 @@ class HInvokePolymorphic final : public HInvoke { method_reference, resolved_method, resolved_method_reference, - kPolymorphic), + kPolymorphic, + enable_intrinsic_opt), proto_idx_(proto_idx) { } @@ -4852,7 +4893,8 @@ class HInvokeCustom final : public HInvoke { uint32_t call_site_index, DataType::Type return_type, uint32_t dex_pc, - MethodReference method_reference) + MethodReference method_reference, + bool enable_intrinsic_opt) : HInvoke(kInvokeCustom, allocator, number_of_arguments, @@ -4862,7 +4904,8 @@ class HInvokeCustom final : public HInvoke { method_reference, /* resolved_method= */ nullptr, MethodReference(nullptr, 0u), - kStatic), + kStatic, + enable_intrinsic_opt), call_site_index_(call_site_index) { } @@ -4909,7 +4952,8 @@ class HInvokeStaticOrDirect final : public HInvoke { DispatchInfo dispatch_info, InvokeType invoke_type, MethodReference resolved_method_reference, - ClinitCheckRequirement clinit_check_requirement) + ClinitCheckRequirement clinit_check_requirement, + bool enable_intrinsic_opt) : HInvoke(kInvokeStaticOrDirect, allocator, number_of_arguments, @@ -4922,7 +4966,8 @@ class HInvokeStaticOrDirect final : public HInvoke { method_reference, resolved_method, resolved_method_reference, - invoke_type), + invoke_type, + enable_intrinsic_opt), dispatch_info_(dispatch_info) { SetPackedField<ClinitCheckRequirementField>(clinit_check_requirement); } @@ -5134,7 +5179,8 @@ class HInvokeVirtual final : public HInvoke { MethodReference method_reference, ArtMethod* resolved_method, MethodReference resolved_method_reference, - uint32_t vtable_index) + uint32_t vtable_index, + bool enable_intrinsic_opt) : HInvoke(kInvokeVirtual, allocator, number_of_arguments, @@ -5144,7 +5190,8 @@ class HInvokeVirtual final : public HInvoke { method_reference, resolved_method, resolved_method_reference, - kVirtual), + kVirtual, + enable_intrinsic_opt), vtable_index_(vtable_index) { } @@ -5196,7 +5243,8 @@ class HInvokeInterface final : public HInvoke { ArtMethod* resolved_method, MethodReference resolved_method_reference, uint32_t imt_index, - MethodLoadKind load_kind) + MethodLoadKind load_kind, + bool enable_intrinsic_opt) : HInvoke(kInvokeInterface, allocator, number_of_arguments + (NeedsCurrentMethod(load_kind) ? 1 : 0), @@ -5206,7 +5254,8 @@ class HInvokeInterface final : public HInvoke { method_reference, resolved_method, resolved_method_reference, - kInterface), + kInterface, + enable_intrinsic_opt), imt_index_(imt_index), hidden_argument_load_kind_(load_kind) { } @@ -5321,7 +5370,7 @@ class HNewArray final : public HExpression<2> { kFieldComponentSizeShift + kFieldComponentSizeShiftSize; static_assert(kNumberOfNewArrayPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using ComponentSizeShiftField = - BitField<size_t, kFieldComponentSizeShift, kFieldComponentSizeShift>; + BitField<size_t, kFieldComponentSizeShift, kFieldComponentSizeShiftSize>; }; class HAdd final : public HBinaryOperation { @@ -6362,6 +6411,27 @@ class HPredicatedInstanceFieldGet final : public HExpression<2> { const FieldInfo field_info_; }; +enum class WriteBarrierKind { + // Emit the write barrier, with a runtime optimization which checks if the value that it is being + // set is null. + kEmitWithNullCheck, + // Emit the write barrier, without the runtime null check optimization. This could be set because: + // A) It is a write barrier for an ArraySet (which does the optimization with the type check, so + // it never does the optimization at the write barrier stage) + // B) We know that the input can't be null + // C) This write barrier is actually several write barriers coalesced into one. Potentially we + // could ask if every value is null for a runtime optimization at the cost of compile time / code + // size. At the time of writing it was deemed not worth the effort. + kEmitNoNullCheck, + // Skip emitting the write barrier. This could be set because: + // A) The write barrier is not needed (e.g. it is not a reference, or the value is the null + // constant) + // B) This write barrier was coalesced into another one so there's no need to emit it. + kDontEmit, + kLast = kDontEmit +}; +std::ostream& operator<<(std::ostream& os, WriteBarrierKind rhs); + class HInstanceFieldSet final : public HExpression<2> { public: HInstanceFieldSet(HInstruction* object, @@ -6386,6 +6456,7 @@ class HInstanceFieldSet final : public HExpression<2> { dex_file) { SetPackedFlag<kFlagValueCanBeNull>(true); SetPackedFlag<kFlagIsPredicatedSet>(false); + SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck); SetRawInputAt(0, object); SetRawInputAt(1, value); } @@ -6406,6 +6477,12 @@ class HInstanceFieldSet final : public HExpression<2> { void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); } bool GetIsPredicatedSet() const { return GetPackedFlag<kFlagIsPredicatedSet>(); } void SetIsPredicatedSet(bool value = true) { SetPackedFlag<kFlagIsPredicatedSet>(value); } + WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); } + void SetWriteBarrierKind(WriteBarrierKind kind) { + DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck) + << "We shouldn't go back to the original value."; + SetPackedField<WriteBarrierKindField>(kind); + } DECLARE_INSTRUCTION(InstanceFieldSet); @@ -6415,11 +6492,17 @@ class HInstanceFieldSet final : public HExpression<2> { private: static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits; static constexpr size_t kFlagIsPredicatedSet = kFlagValueCanBeNull + 1; - static constexpr size_t kNumberOfInstanceFieldSetPackedBits = kFlagIsPredicatedSet + 1; + static constexpr size_t kWriteBarrierKind = kFlagIsPredicatedSet + 1; + static constexpr size_t kWriteBarrierKindSize = + MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast)); + static constexpr size_t kNumberOfInstanceFieldSetPackedBits = + kWriteBarrierKind + kWriteBarrierKindSize; static_assert(kNumberOfInstanceFieldSetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); const FieldInfo field_info_; + using WriteBarrierKindField = + BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>; }; class HArrayGet final : public HExpression<2> { @@ -6540,6 +6623,8 @@ class HArraySet final : public HExpression<3> { SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference); SetPackedFlag<kFlagValueCanBeNull>(true); SetPackedFlag<kFlagStaticTypeOfArrayIsObjectArray>(false); + // ArraySets never do the null check optimization at the write barrier stage. + SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitNoNullCheck); SetRawInputAt(0, array); SetRawInputAt(1, index); SetRawInputAt(2, value); @@ -6560,8 +6645,10 @@ class HArraySet final : public HExpression<3> { return false; } - void ClearNeedsTypeCheck() { + void ClearTypeCheck() { SetPackedFlag<kFlagNeedsTypeCheck>(false); + // Clear the `CanTriggerGC` flag too as we can only trigger a GC when doing a type check. + SetSideEffects(GetSideEffects().Exclusion(SideEffects::CanTriggerGC())); } void ClearValueCanBeNull() { @@ -6610,6 +6697,16 @@ class HArraySet final : public HExpression<3> { : SideEffects::None(); } + WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); } + + void SetWriteBarrierKind(WriteBarrierKind kind) { + DCHECK(kind != WriteBarrierKind::kEmitNoNullCheck) + << "We shouldn't go back to the original value."; + DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck) + << "We never do the null check optimization for ArraySets."; + SetPackedField<WriteBarrierKindField>(kind); + } + DECLARE_INSTRUCTION(ArraySet); protected: @@ -6625,11 +6722,16 @@ class HArraySet final : public HExpression<3> { // Cached information for the reference_type_info_ so that codegen // does not need to inspect the static type. static constexpr size_t kFlagStaticTypeOfArrayIsObjectArray = kFlagValueCanBeNull + 1; - static constexpr size_t kNumberOfArraySetPackedBits = - kFlagStaticTypeOfArrayIsObjectArray + 1; + static constexpr size_t kWriteBarrierKind = kFlagStaticTypeOfArrayIsObjectArray + 1; + static constexpr size_t kWriteBarrierKindSize = + MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast)); + static constexpr size_t kNumberOfArraySetPackedBits = kWriteBarrierKind + kWriteBarrierKindSize; static_assert(kNumberOfArraySetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using ExpectedComponentTypeField = BitField<DataType::Type, kFieldExpectedComponentType, kFieldExpectedComponentTypeSize>; + + using WriteBarrierKindField = + BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>; }; class HArrayLength final : public HExpression<1> { @@ -6714,9 +6816,10 @@ class HBoundsCheck final : public HExpression<2> { class HSuspendCheck final : public HExpression<0> { public: - explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc) + explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc, bool is_no_op = false) : HExpression(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) { + SetPackedFlag<kFlagIsNoOp>(is_no_op); } bool IsClonable() const override { return true; } @@ -6725,6 +6828,10 @@ class HSuspendCheck final : public HExpression<0> { return true; } + void SetIsNoOp(bool is_no_op) { SetPackedFlag<kFlagIsNoOp>(is_no_op); } + bool IsNoOp() const { return GetPackedFlag<kFlagIsNoOp>(); } + + void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; } SlowPathCode* GetSlowPath() const { return slow_path_; } @@ -6733,28 +6840,42 @@ class HSuspendCheck final : public HExpression<0> { protected: DEFAULT_COPY_CONSTRUCTOR(SuspendCheck); + // True if the HSuspendCheck should not emit any code during codegen. It is + // not possible to simply remove this instruction to disable codegen, as + // other optimizations (e.g: CHAGuardVisitor::HoistGuard) depend on + // HSuspendCheck being present in every loop. + static constexpr size_t kFlagIsNoOp = kNumberOfGenericPackedBits; + static constexpr size_t kNumberOfSuspendCheckPackedBits = kFlagIsNoOp + 1; + static_assert(kNumberOfSuspendCheckPackedBits <= HInstruction::kMaxNumberOfPackedBits, + "Too many packed fields."); + private: // Only used for code generation, in order to share the same slow path between back edges // of a same loop. SlowPathCode* slow_path_; }; -// Pseudo-instruction which provides the native debugger with mapping information. -// It ensures that we can generate line number and local variables at this point. -class HNativeDebugInfo : public HExpression<0> { +// Pseudo-instruction which doesn't generate any code. +// If `emit_environment` is true, it can be used to generate an environment. It is used, for +// example, to provide the native debugger with mapping information. It ensures that we can generate +// line number and local variables at this point. +class HNop : public HExpression<0> { public: - explicit HNativeDebugInfo(uint32_t dex_pc) - : HExpression<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) { + explicit HNop(uint32_t dex_pc, bool needs_environment) + : HExpression<0>(kNop, SideEffects::None(), dex_pc), needs_environment_(needs_environment) { } bool NeedsEnvironment() const override { - return true; + return needs_environment_; } - DECLARE_INSTRUCTION(NativeDebugInfo); + DECLARE_INSTRUCTION(Nop); protected: - DEFAULT_COPY_CONSTRUCTOR(NativeDebugInfo); + DEFAULT_COPY_CONSTRUCTOR(Nop); + + private: + bool needs_environment_; }; /** @@ -7222,6 +7343,10 @@ class HLoadMethodHandle final : public HInstruction { return SideEffects::CanTriggerGC(); } + bool CanThrow() const override { return true; } + + bool NeedsEnvironment() const override { return true; } + DECLARE_INSTRUCTION(LoadMethodHandle); protected: @@ -7266,6 +7391,10 @@ class HLoadMethodType final : public HInstruction { return SideEffects::CanTriggerGC(); } + bool CanThrow() const override { return true; } + + bool NeedsEnvironment() const override { return true; } + DECLARE_INSTRUCTION(LoadMethodType); protected: @@ -7400,6 +7529,7 @@ class HStaticFieldSet final : public HExpression<2> { declaring_class_def_index, dex_file) { SetPackedFlag<kFlagValueCanBeNull>(true); + SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck); SetRawInputAt(0, cls); SetRawInputAt(1, value); } @@ -7415,6 +7545,13 @@ class HStaticFieldSet final : public HExpression<2> { bool GetValueCanBeNull() const { return GetPackedFlag<kFlagValueCanBeNull>(); } void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); } + WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); } + void SetWriteBarrierKind(WriteBarrierKind kind) { + DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck) + << "We shouldn't go back to the original value."; + SetPackedField<WriteBarrierKindField>(kind); + } + DECLARE_INSTRUCTION(StaticFieldSet); protected: @@ -7422,25 +7559,34 @@ class HStaticFieldSet final : public HExpression<2> { private: static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits; - static constexpr size_t kNumberOfStaticFieldSetPackedBits = kFlagValueCanBeNull + 1; + static constexpr size_t kWriteBarrierKind = kFlagValueCanBeNull + 1; + static constexpr size_t kWriteBarrierKindSize = + MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast)); + static constexpr size_t kNumberOfStaticFieldSetPackedBits = + kWriteBarrierKind + kWriteBarrierKindSize; static_assert(kNumberOfStaticFieldSetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); const FieldInfo field_info_; + using WriteBarrierKindField = + BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>; }; class HStringBuilderAppend final : public HVariableInputSizeInstruction { public: HStringBuilderAppend(HIntConstant* format, uint32_t number_of_arguments, + bool has_fp_args, ArenaAllocator* allocator, uint32_t dex_pc) : HVariableInputSizeInstruction( kStringBuilderAppend, DataType::Type::kReference, - // The runtime call may read memory from inputs. It never writes outside - // of the newly allocated result object (or newly allocated helper objects). - SideEffects::AllReads().Union(SideEffects::CanTriggerGC()), + SideEffects::CanTriggerGC().Union( + // The runtime call may read memory from inputs. It never writes outside + // of the newly allocated result object or newly allocated helper objects, + // except for float/double arguments where we reuse thread-local helper objects. + has_fp_args ? SideEffects::AllWritesAndReads() : SideEffects::AllReads()), dex_pc, allocator, number_of_arguments + /* format */ 1u, @@ -8393,7 +8539,7 @@ class HIntermediateAddress final : public HExpression<2> { #include "nodes_x86.h" #endif -namespace art { +namespace art HIDDEN { class OptimizingCompilerStats; @@ -8457,7 +8603,7 @@ HInstruction* ReplaceInstrOrPhiByClone(HInstruction* instr); // Create a clone for each clonable instructions/phis and replace the original with the clone. // // Used for testing individual instruction cloner. -class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor { +class CloneAndReplaceInstructionVisitor final : public HGraphDelegateVisitor { public: explicit CloneAndReplaceInstructionVisitor(HGraph* graph) : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {} diff --git a/compiler/optimizing/nodes_shared.cc b/compiler/optimizing/nodes_shared.cc index eca97d7a70..b3a7ad9a05 100644 --- a/compiler/optimizing/nodes_shared.cc +++ b/compiler/optimizing/nodes_shared.cc @@ -23,7 +23,7 @@ #include "instruction_simplifier_shared.h" -namespace art { +namespace art HIDDEN { using helpers::CanFitInShifterOperand; diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index 7dcac1787e..27e610328f 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -22,7 +22,7 @@ // (defining `HInstruction` and co). #include "nodes.h" -namespace art { +namespace art HIDDEN { class HMultiplyAccumulate final : public HExpression<3> { public: diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 34f0e9b1e1..29210fe10f 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -17,11 +17,12 @@ #include "nodes.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "optimizing_unit_test.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { class NodeTest : public OptimizingUnitTest {}; diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index a2cd86dc33..73f6c40a0d 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -21,7 +21,7 @@ // is included in the header file nodes.h itself. However it gives editing tools better context. #include "nodes.h" -namespace art { +namespace art HIDDEN { // Memory alignment, represented as an offset relative to a base, where 0 <= offset < base, // and base is a power of two. For example, the value Alignment(16, 0) means memory is diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc index b0a665d704..e0a48db84f 100644 --- a/compiler/optimizing/nodes_vector_test.cc +++ b/compiler/optimizing/nodes_vector_test.cc @@ -15,10 +15,11 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "nodes.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for testing vector nodes. diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index 8e8fbc1581..e246390aa5 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -17,7 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_NODES_X86_H_ #define ART_COMPILER_OPTIMIZING_NODES_X86_H_ -namespace art { +namespace art HIDDEN { // Compute the address of the method for X86 Constant area support. class HX86ComputeBaseMethodAddress final : public HExpression<0> { diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 2cac38b715..12e9a1046d 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -55,10 +55,11 @@ #include "select_generator.h" #include "sharpening.h" #include "side_effects_analysis.h" +#include "write_barrier_elimination.h" // Decide between default or alternative pass name. -namespace art { +namespace art HIDDEN { const char* OptimizationPassName(OptimizationPass pass) { switch (pass) { @@ -76,6 +77,7 @@ const char* OptimizationPassName(OptimizationPass pass) { return BoundsCheckElimination::kBoundsCheckEliminationPassName; case OptimizationPass::kLoadStoreElimination: return LoadStoreElimination::kLoadStoreEliminationPassName; + case OptimizationPass::kAggressiveConstantFolding: case OptimizationPass::kConstantFolding: return HConstantFolding::kConstantFoldingPassName; case OptimizationPass::kDeadCodeElimination: @@ -95,6 +97,8 @@ const char* OptimizationPassName(OptimizationPass pass) { return ConstructorFenceRedundancyElimination::kCFREPassName; case OptimizationPass::kScheduling: return HInstructionScheduling::kInstructionSchedulingPassName; + case OptimizationPass::kWriteBarrierElimination: + return WriteBarrierElimination::kWBEPassName; #ifdef ART_ENABLE_CODEGEN_arm case OptimizationPass::kInstructionSimplifierArm: return arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName; @@ -194,7 +198,8 @@ ArenaVector<HOptimization*> ConstructOptimizations( opt = most_recent_side_effects = new (allocator) SideEffectsAnalysis(graph, pass_name); break; case OptimizationPass::kInductionVarAnalysis: - opt = most_recent_induction = new (allocator) HInductionVarAnalysis(graph, pass_name); + opt = most_recent_induction = + new (allocator) HInductionVarAnalysis(graph, stats, pass_name); break; // // Passes that need prior analysis. @@ -221,7 +226,11 @@ ArenaVector<HOptimization*> ConstructOptimizations( // Regular passes. // case OptimizationPass::kConstantFolding: - opt = new (allocator) HConstantFolding(graph, pass_name); + opt = new (allocator) HConstantFolding(graph, stats, pass_name); + break; + case OptimizationPass::kAggressiveConstantFolding: + opt = new (allocator) + HConstantFolding(graph, stats, pass_name, /* use_all_optimizations_ = */ true); break; case OptimizationPass::kDeadCodeElimination: opt = new (allocator) HDeadCodeElimination(graph, stats, pass_name); @@ -239,6 +248,7 @@ ArenaVector<HOptimization*> ConstructOptimizations( /* total_number_of_instructions= */ 0, /* parent= */ nullptr, /* depth= */ 0, + /* try_catch_inlining_allowed= */ true, pass_name); break; } @@ -267,6 +277,9 @@ ArenaVector<HOptimization*> ConstructOptimizations( case OptimizationPass::kLoadStoreElimination: opt = new (allocator) LoadStoreElimination(graph, stats, pass_name); break; + case OptimizationPass::kWriteBarrierElimination: + opt = new (allocator) WriteBarrierElimination(graph, stats, pass_name); + break; case OptimizationPass::kScheduling: opt = new (allocator) HInstructionScheduling( graph, codegen->GetCompilerOptions().GetInstructionSet(), codegen, pass_name); diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 2113df0c81..134e3cdc7a 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -18,10 +18,11 @@ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #include "base/arena_object.h" +#include "base/macros.h" #include "nodes.h" #include "optimizing_compiler_stats.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class DexCompilationUnit; @@ -42,7 +43,7 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> { // Return the name of the pass. Pass names for a single HOptimization should be of form // <optimization_name> or <optimization_name>$<pass_name> for common <optimization_name> prefix. - // Example: 'instruction_simplifier', 'instruction_simplifier$after_bce', + // Example: 'instruction_simplifier', 'instruction_simplifier$before_codegen', // 'instruction_simplifier$before_codegen'. const char* GetPassName() const { return pass_name_; } @@ -66,6 +67,7 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> { // field is preferred over a string lookup at places where performance matters. // TODO: generate this table and lookup methods below automatically? enum class OptimizationPass { + kAggressiveConstantFolding, kAggressiveInstructionSimplifier, kBoundsCheckElimination, kCHAGuardOptimization, @@ -83,6 +85,7 @@ enum class OptimizationPass { kScheduling, kSelectGenerator, kSideEffectsAnalysis, + kWriteBarrierElimination, #ifdef ART_ENABLE_CODEGEN_arm kInstructionSimplifierArm, kCriticalNativeAbiFixupArm, diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index bad540e03c..f12e748941 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -18,6 +18,7 @@ #include <vector> #include "arch/instruction_set.h" +#include "base/macros.h" #include "base/runtime_debug.h" #include "cfi_test.h" #include "driver/compiler_options.h" @@ -32,7 +33,7 @@ namespace vixl32 = vixl::aarch32; -namespace art { +namespace art HIDDEN { // Run the tests only on host. #ifndef ART_TARGET_ANDROID @@ -167,9 +168,20 @@ TEST_ISA(kThumb2) // barrier configuration, and as such is removed from the set of // callee-save registers in the ARM64 code generator of the Optimizing // compiler. -#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) -TEST_ISA(kArm64) -#endif +// +// We can't use compile-time macros for read-barrier as the introduction +// of userfaultfd-GC has made it a runtime choice. +TEST_F(OptimizingCFITest, kArm64) { + if (kUseBakerReadBarrier && gUseReadBarrier) { + std::vector<uint8_t> expected_asm( + expected_asm_kArm64, + expected_asm_kArm64 + arraysize(expected_asm_kArm64)); + std::vector<uint8_t> expected_cfi( + expected_cfi_kArm64, + expected_cfi_kArm64 + arraysize(expected_cfi_kArm64)); + TestImpl(InstructionSet::kArm64, "kArm64", expected_asm, expected_cfi); + } +} #endif #ifdef ART_ENABLE_CODEGEN_x86 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 6eb3d01e42..00eb6e5c42 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -33,12 +33,11 @@ #include "base/timing_logger.h" #include "builder.h" #include "code_generator.h" -#include "compiled_method.h" #include "compiler.h" #include "debug/elf_debug_writer.h" #include "debug/method_debug_info.h" #include "dex/dex_file_types.h" -#include "driver/compiled_method_storage.h" +#include "driver/compiled_code_storage.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "graph_checker.h" @@ -52,6 +51,7 @@ #include "linker/linker_patch.h" #include "nodes.h" #include "oat_quick_method_header.h" +#include "optimizing/write_barrier_elimination.h" #include "prepare_for_register_allocation.h" #include "reference_type_propagation.h" #include "register_allocator_linear_scan.h" @@ -62,7 +62,7 @@ #include "stack_map_stream.h" #include "utils/assembler.h" -namespace art { +namespace art HIDDEN { static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB; @@ -269,7 +269,7 @@ class PassScope : public ValueObject { class OptimizingCompiler final : public Compiler { public: explicit OptimizingCompiler(const CompilerOptions& compiler_options, - CompiledMethodStorage* storage); + CompiledCodeStorage* storage); ~OptimizingCompiler() override; bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const override; @@ -359,11 +359,11 @@ class OptimizingCompiler final : public Compiler { const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer) const; - private: // Create a 'CompiledMethod' for an optimized graph. CompiledMethod* Emit(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, + bool is_intrinsic, const dex::CodeItem* item) const; // Try compiling a method and return the code generator used for @@ -413,7 +413,7 @@ class OptimizingCompiler final : public Compiler { static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ OptimizingCompiler::OptimizingCompiler(const CompilerOptions& compiler_options, - CompiledMethodStorage* storage) + CompiledCodeStorage* storage) : Compiler(compiler_options, storage, kMaximumCompilationTimeBeforeWarning) { // Enable C1visualizer output. const std::string& cfg_file_name = compiler_options.GetDumpCfgFileName(); @@ -568,6 +568,9 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, } #endif default: + UNUSED(graph); + UNUSED(dex_compilation_unit); + UNUSED(pass_observer); return false; } } @@ -653,7 +656,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, OptDef(OptimizationPass::kGlobalValueNumbering), // Simplification (TODO: only if GVN occurred). OptDef(OptimizationPass::kSelectGenerator), - OptDef(OptimizationPass::kConstantFolding, + OptDef(OptimizationPass::kAggressiveConstantFolding, "constant_folding$after_gvn"), OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_gvn"), @@ -668,20 +671,27 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, OptDef(OptimizationPass::kLoopOptimization), // Simplification. OptDef(OptimizationPass::kConstantFolding, - "constant_folding$after_bce"), + "constant_folding$after_loop_opt"), OptDef(OptimizationPass::kAggressiveInstructionSimplifier, - "instruction_simplifier$after_bce"), + "instruction_simplifier$after_loop_opt"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_loop_opt"), // Other high-level optimizations. OptDef(OptimizationPass::kLoadStoreElimination), OptDef(OptimizationPass::kCHAGuardOptimization), - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$final"), OptDef(OptimizationPass::kCodeSinking), + // Simplification. + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$before_codegen"), // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. OptDef(OptimizationPass::kAggressiveInstructionSimplifier, "instruction_simplifier$before_codegen"), + // Simplification may result in dead code that should be removed prior to + // code generation. + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$before_codegen"), // Eliminate constructor fences after code sinking to avoid // complicated sinking logic to split a fence with many inputs. OptDef(OptimizationPass::kConstructorFenceRedundancyElimination) @@ -711,18 +721,19 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, + bool is_intrinsic, const dex::CodeItem* code_item_for_osr_check) const { ArenaVector<linker::LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); ScopedArenaVector<uint8_t> stack_map = codegen->BuildStackMaps(code_item_for_osr_check); - CompiledMethodStorage* storage = GetCompiledMethodStorage(); - CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( - storage, + CompiledCodeStorage* storage = GetCompiledCodeStorage(); + CompiledMethod* compiled_method = storage->CreateCompiledMethod( codegen->GetInstructionSet(), code_allocator->GetMemory(), ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), - ArrayRef<const linker::LinkerPatch>(linker_patches)); + ArrayRef<const linker::LinkerPatch>(linker_patches), + is_intrinsic); for (const linker::LinkerPatch& patch : linker_patches) { if (codegen->NeedsThunkCode(patch) && storage->GetThunkCode(patch).empty()) { @@ -891,6 +902,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, RunBaselineOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer); } else { RunOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer); + PassScope scope(WriteBarrierElimination::kWBEPassName, &pass_observer); + WriteBarrierElimination(graph, compilation_stats_.get()).Run(); } RegisterAllocator::Strategy regalloc_strategy = @@ -984,6 +997,10 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( optimizations); RunArchOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer); + { + PassScope scope(WriteBarrierElimination::kWBEPassName, &pass_observer); + WriteBarrierElimination(graph, compilation_stats_.get()).Run(); + } AllocateRegisters(graph, codegen.get(), @@ -1079,10 +1096,8 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, compiled_method = Emit(&allocator, &code_allocator, codegen.get(), + compiled_intrinsic, compiled_intrinsic ? nullptr : code_item); - if (compiled_intrinsic) { - compiled_method->MarkAsIntrinsic(); - } if (kArenaAllocatorCountAllocations) { codegen.reset(); // Release codegen's ScopedArenaAllocator for memory accounting. @@ -1115,17 +1130,18 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, static ScopedArenaVector<uint8_t> CreateJniStackMap(ScopedArenaAllocator* allocator, const JniCompiledMethod& jni_compiled_method, - size_t code_size) { + size_t code_size, + bool debuggable) { // StackMapStream is quite large, so allocate it using the ScopedArenaAllocator // to stay clear of the frame size limit. std::unique_ptr<StackMapStream> stack_map_stream( new (allocator) StackMapStream(allocator, jni_compiled_method.GetInstructionSet())); - stack_map_stream->BeginMethod( - jni_compiled_method.GetFrameSize(), - jni_compiled_method.GetCoreSpillMask(), - jni_compiled_method.GetFpSpillMask(), - /* num_dex_registers= */ 0, - /* baseline= */ false); + stack_map_stream->BeginMethod(jni_compiled_method.GetFrameSize(), + jni_compiled_method.GetCoreSpillMask(), + jni_compiled_method.GetFpSpillMask(), + /* num_dex_registers= */ 0, + /* baseline= */ false, + debuggable); stack_map_stream->EndMethod(code_size); return stack_map_stream->Encode(); } @@ -1172,12 +1188,11 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, method, &handles)); if (codegen != nullptr) { - CompiledMethod* compiled_method = Emit(&allocator, - &code_allocator, - codegen.get(), - /* item= */ nullptr); - compiled_method->MarkAsIntrinsic(); - return compiled_method; + return Emit(&allocator, + &code_allocator, + codegen.get(), + /*is_intrinsic=*/ true, + /*item=*/ nullptr); } } } @@ -1187,19 +1202,22 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledNativeStub); ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map. - ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap( - &stack_map_allocator, jni_compiled_method, jni_compiled_method.GetCode().size()); - return CompiledMethod::SwapAllocCompiledMethod( - GetCompiledMethodStorage(), + ScopedArenaVector<uint8_t> stack_map = + CreateJniStackMap(&stack_map_allocator, + jni_compiled_method, + jni_compiled_method.GetCode().size(), + compiler_options.GetDebuggable() && compiler_options.IsJitCompiler()); + return GetCompiledCodeStorage()->CreateCompiledMethod( jni_compiled_method.GetInstructionSet(), jni_compiled_method.GetCode(), ArrayRef<const uint8_t>(stack_map), jni_compiled_method.GetCfi(), - /* patches= */ ArrayRef<const linker::LinkerPatch>()); + /*patches=*/ ArrayRef<const linker::LinkerPatch>(), + /*is_intrinsic=*/ false); } Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options, - CompiledMethodStorage* storage) { + CompiledCodeStorage* storage) { return new OptimizingCompiler(compiler_options, storage); } @@ -1233,6 +1251,19 @@ bool OptimizingCompiler::JitCompile(Thread* self, ArenaAllocator allocator(runtime->GetJitArenaPool()); if (UNLIKELY(method->IsNative())) { + // Use GenericJniTrampoline for critical native methods in debuggable runtimes. We don't + // support calling method entry / exit hooks for critical native methods yet. + // TODO(mythria): Add support for calling method entry / exit hooks in JITed stubs for critical + // native methods too. + if (compiler_options.GetDebuggable() && method->IsCriticalNative()) { + DCHECK(compiler_options.IsJitCompiler()); + return false; + } + // Java debuggable runtimes should set compiler options to debuggable, so that we either + // generate method entry / exit hooks or skip JITing. For critical native methods we don't + // generate method entry / exit hooks so we shouldn't JIT them in debuggable runtimes. + DCHECK_IMPLIES(method->IsCriticalNative(), !runtime->IsJavaDebuggable()); + JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod( compiler_options, access_flags, method_idx, *dex_file, &allocator); std::vector<Handle<mirror::Object>> roots; @@ -1241,8 +1272,11 @@ bool OptimizingCompiler::JitCompile(Thread* self, ArenaStack arena_stack(runtime->GetJitArenaPool()); // StackMapStream is large and it does not fit into this frame, so we need helper method. ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map. - ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap( - &stack_map_allocator, jni_compiled_method, jni_compiled_method.GetCode().size()); + ScopedArenaVector<uint8_t> stack_map = + CreateJniStackMap(&stack_map_allocator, + jni_compiled_method, + jni_compiled_method.GetCode().size(), + compiler_options.GetDebuggable() && compiler_options.IsJitCompiler()); ArrayRef<const uint8_t> reserved_code; ArrayRef<const uint8_t> reserved_data; diff --git a/compiler/optimizing/optimizing_compiler.h b/compiler/optimizing/optimizing_compiler.h index cd6d684590..737ffd034a 100644 --- a/compiler/optimizing/optimizing_compiler.h +++ b/compiler/optimizing/optimizing_compiler.h @@ -18,18 +18,19 @@ #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_ #include "base/globals.h" +#include "base/macros.h" #include "base/mutex.h" -namespace art { +namespace art HIDDEN { class ArtMethod; +class CompiledCodeStorage; class Compiler; -class CompiledMethodStorage; class CompilerOptions; class DexFile; Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options, - CompiledMethodStorage* storage); + CompiledCodeStorage* storage); bool EncodeArtMethodInInlineInfo(ArtMethod* method); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index d458e42608..a1d0a5a845 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -26,8 +26,9 @@ #include "base/atomic.h" #include "base/globals.h" +#include "base/macros.h" -namespace art { +namespace art HIDDEN { enum class MethodCompilationStat { kAttemptBytecodeCompilation = 0, @@ -46,6 +47,7 @@ enum class MethodCompilationStat { kUnresolvedFieldNotAFastAccess, kRemovedCheckedCast, kRemovedDeadInstruction, + kRemovedTry, kRemovedNullCheck, kNotCompiledSkipped, kNotCompiledInvalidBytecode, @@ -59,6 +61,7 @@ enum class MethodCompilationStat { kNotCompiledSpaceFilter, kNotCompiledUnhandledInstruction, kNotCompiledUnsupportedIsa, + kNotCompiledInliningIrreducibleLoop, kNotCompiledIrreducibleLoopAndStringInit, kNotCompiledPhiEquivalentInOsr, kInlinedMonomorphicCall, @@ -73,11 +76,13 @@ enum class MethodCompilationStat { kLoopVectorizedIdiom, kSelectGenerated, kRemovedInstanceOf, + kPropagatedIfValue, kInlinedInvokeVirtualOrInterface, kInlinedLastInvokeVirtualOrInterface, kImplicitNullCheckGenerated, kExplicitNullCheckGenerated, kSimplifyIf, + kSimplifyIfAddedPhi, kSimplifyThrowingInvoke, kInstructionSunk, kNotInlinedUnresolvedEntrypoint, @@ -88,16 +93,19 @@ enum class MethodCompilationStat { kNotInlinedEnvironmentBudget, kNotInlinedInstructionBudget, kNotInlinedLoopWithoutExit, - kNotInlinedIrreducibleLoop, + kNotInlinedIrreducibleLoopCallee, + kNotInlinedIrreducibleLoopCaller, kNotInlinedAlwaysThrows, kNotInlinedInfiniteLoop, - kNotInlinedTryCatchCaller, kNotInlinedTryCatchCallee, + kNotInlinedTryCatchDisabled, kNotInlinedRegisterAllocator, kNotInlinedCannotBuild, + kNotInlinedNeverInlineAnnotation, kNotInlinedNotCompilable, kNotInlinedNotVerified, kNotInlinedCodeItem, + kNotInlinedEndsWithThrow, kNotInlinedWont, kNotInlinedRecursiveBudget, kNotInlinedPolymorphicRecursiveBudget, @@ -105,12 +113,15 @@ enum class MethodCompilationStat { kNotInlinedUnresolved, kNotInlinedPolymorphic, kNotInlinedCustom, + kNotVarAnalyzedPathological, kTryInline, kConstructorFenceGeneratedNew, kConstructorFenceGeneratedFinal, kConstructorFenceRemovedLSE, kConstructorFenceRemovedPFRA, kConstructorFenceRemovedCFRE, + kPossibleWriteBarrier, + kRemovedWriteBarrier, kBitstringTypeCheck, kJitOutOfMemoryForCommit, kFullLSEAllocationRemoved, diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index e83688039a..2e05c41f01 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -25,6 +25,7 @@ #include <vector> #include <variant> +#include "base/macros.h" #include "base/indenter.h" #include "base/malloc_arena_pool.h" #include "base/scoped_arena_allocator.h" @@ -46,7 +47,7 @@ #include "ssa_builder.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { #define NUM_INSTRUCTIONS(...) \ (sizeof((uint16_t[]) {__VA_ARGS__}) /sizeof(uint16_t)) @@ -240,13 +241,14 @@ class OptimizingUnitTestHelper { // Create the dex file based on the fake data. Call the constructor so that we can use virtual // functions. Don't use the arena for the StandardDexFile otherwise the dex location leaks. - dex_files_.emplace_back(new StandardDexFile( - dex_data, - sizeof(StandardDexFile::Header), - "no_location", - /*location_checksum*/ 0, - /*oat_dex_file*/ nullptr, - /*container*/ nullptr)); + auto container = + std::make_shared<MemoryDexFileContainer>(dex_data, sizeof(StandardDexFile::Header)); + dex_files_.emplace_back(new StandardDexFile(dex_data, + sizeof(StandardDexFile::Header), + "no_location", + /*location_checksum*/ 0, + /*oat_dex_file*/ nullptr, + std::move(container))); graph_ = new (allocator) HGraph( allocator, @@ -260,9 +262,10 @@ class OptimizingUnitTestHelper { // Create a control-flow graph from Dex instructions. HGraph* CreateCFG(const std::vector<uint16_t>& data, - DataType::Type return_type = DataType::Type::kInt32, - VariableSizedHandleScope* handles = nullptr) { - HGraph* graph = CreateGraph(handles); + DataType::Type return_type = DataType::Type::kInt32) { + ScopedObjectAccess soa(Thread::Current()); + VariableSizedHandleScope handles(soa.Self()); + HGraph* graph = CreateGraph(&handles); // The code item data might not aligned to 4 bytes, copy it to ensure that. const size_t code_item_size = data.size() * sizeof(data.front()); @@ -278,7 +281,7 @@ class OptimizingUnitTestHelper { /* class_linker= */ nullptr, graph->GetDexFile(), code_item, - /* class_def_index= */ DexFile::kDexNoIndex16, + /* class_def_idx= */ DexFile::kDexNoIndex16, /* method_idx= */ dex::kDexNoIndex, /* access_flags= */ 0u, /* verified_method= */ nullptr, @@ -320,25 +323,10 @@ class OptimizingUnitTestHelper { // Run GraphChecker with all checks. // // Return: the status whether the run is successful. - bool CheckGraph(HGraph* graph, std::ostream& oss = std::cerr) { - return CheckGraph(graph, /*check_ref_type_info=*/true, oss); - } - bool CheckGraph(std::ostream& oss = std::cerr) { return CheckGraph(graph_, oss); } - // Run GraphChecker with all checks except reference type information checks. - // - // Return: the status whether the run is successful. - bool CheckGraphSkipRefTypeInfoChecks(HGraph* graph, std::ostream& oss = std::cerr) { - return CheckGraph(graph, /*check_ref_type_info=*/false, oss); - } - - bool CheckGraphSkipRefTypeInfoChecks(std::ostream& oss = std::cerr) { - return CheckGraphSkipRefTypeInfoChecks(graph_, oss); - } - HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction, ArenaVector<HInstruction*>* current_locals) { HEnvironment* environment = new (GetAllocator()) HEnvironment( @@ -473,7 +461,8 @@ class OptimizingUnitTestHelper { HInvokeStaticOrDirect::DispatchInfo{}, InvokeType::kStatic, /* resolved_method_reference= */ method_reference, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); for (auto [ins, idx] : ZipCount(MakeIterationRange(args))) { res->SetRawInputAt(idx, ins); } @@ -531,9 +520,8 @@ class OptimizingUnitTestHelper { } protected: - bool CheckGraph(HGraph* graph, bool check_ref_type_info, std::ostream& oss) { + bool CheckGraph(HGraph* graph, std::ostream& oss) { GraphChecker checker(graph); - checker.SetRefTypeInfoCheckEnabled(check_ref_type_info); checker.Run(); checker.Dump(oss); return checker.IsValid(); @@ -559,7 +547,7 @@ class OptimizingUnitTestHelper { class OptimizingUnitTest : public CommonArtTest, public OptimizingUnitTestHelper {}; // Naive string diff data type. -typedef std::list<std::pair<std::string, std::string>> diff_t; +using diff_t = std::list<std::pair<std::string, std::string>>; // An alias for the empty string used to make it clear that a line is // removed in a diff. @@ -586,7 +574,7 @@ inline std::ostream& operator<<(std::ostream& oss, const AdjacencyListGraph& alg return alg.Dump(oss); } -class PatternMatchGraphVisitor : public HGraphVisitor { +class PatternMatchGraphVisitor final : public HGraphVisitor { private: struct HandlerWrapper { public: diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 2036b4a370..9fc4cc86bf 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -19,7 +19,7 @@ #include "base/stl_util.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { // Perform a linear sweep of the moves to add them to the initial list of diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 5fadcab402..17d5122542 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -18,11 +18,12 @@ #define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ #include "base/arena_containers.h" +#include "base/macros.h" #include "base/value_object.h" #include "data_type.h" #include "locations.h" -namespace art { +namespace art HIDDEN { class HParallelMove; class MoveOperands; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index a8ab6cdd0c..a1c05e9cad 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "base/malloc_arena_pool.h" #include "nodes.h" #include "parallel_move_resolver.h" @@ -22,7 +23,7 @@ #include "gtest/gtest-typed-test.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { constexpr int kScratchRegisterStartIndexForTest = 100; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 17f37f05c5..d3da3d3ce1 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -18,13 +18,13 @@ #include "code_generator_x86.h" #include "intrinsics_x86.h" -namespace art { +namespace art HIDDEN { namespace x86 { /** * Finds instructions that need the constant area base as an input. */ -class PCRelativeHandlerVisitor : public HGraphVisitor { +class PCRelativeHandlerVisitor final : public HGraphVisitor { public: PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen) : HGraphVisitor(graph), diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h index 3b470a6502..45578d8050 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.h +++ b/compiler/optimizing/pc_relative_fixups_x86.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_ #define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index c2f3d0e741..398b10abf3 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -22,7 +22,7 @@ #include "optimizing_compiler_stats.h" #include "well_known_classes.h" -namespace art { +namespace art HIDDEN { void PrepareForRegisterAllocation::Run() { // Order does not matter. @@ -83,7 +83,7 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) { if (check->IsStringCharAt()) { // Add a fake environment for String.charAt() inline info as we want the exception // to appear as being thrown from there. Skip if we're compiling String.charAt() itself. - ArtMethod* char_at_method = jni::DecodeArtMethod(WellKnownClasses::java_lang_String_charAt); + ArtMethod* char_at_method = WellKnownClasses::java_lang_String_charAt; if (GetGraph()->GetArtMethod() != char_at_method) { ArenaAllocator* allocator = GetGraph()->GetAllocator(); HEnvironment* environment = new (allocator) HEnvironment(allocator, @@ -109,7 +109,7 @@ void PrepareForRegisterAllocation::VisitArraySet(HArraySet* instruction) { if (value->IsNullConstant()) { DCHECK_EQ(value->GetType(), DataType::Type::kReference); if (instruction->NeedsTypeCheck()) { - instruction->ClearNeedsTypeCheck(); + instruction->ClearTypeCheck(); } } } @@ -295,15 +295,16 @@ bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, return false; } - // In debug mode, check that we have not inserted a throwing instruction - // or an instruction with side effects between input and user. - if (kIsDebugBuild) { - for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) { - CHECK(between != nullptr); // User must be after input in the same block. - CHECK(!between->CanThrow()) << *between << " User: " << *user; - CHECK(!between->HasSideEffects()) << *between << " User: " << *user; + // If there's a instruction between them that can throw or it has side effects, we cannot move the + // responsibility. + for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) { + DCHECK(between != nullptr) << " User must be after input in the same block. input: " << *input + << ", user: " << *user; + if (between->CanThrow() || between->HasSideEffects()) { + return false; } } + return true; } diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index e0bb76eb22..0426f8470b 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_ #define ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_ +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class CompilerOptions; class OptimizingCompilerStats; diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h index 8ef9ce4e8b..77ddb97707 100644 --- a/compiler/optimizing/pretty_printer.h +++ b/compiler/optimizing/pretty_printer.h @@ -19,9 +19,10 @@ #include "android-base/stringprintf.h" +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class HPrettyPrinter : public HGraphVisitor { public: diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index 6ef386b4a5..90d5f8f08f 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -17,6 +17,7 @@ #include "pretty_printer.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" @@ -25,9 +26,9 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { -class PrettyPrinterTest : public OptimizingUnitTest { +class PrettyPrinterTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, const char* expected); }; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index e6024b08cb..91bae5f49b 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -29,7 +29,7 @@ #include "mirror/dex_cache.h" #include "scoped_thread_state_change-inl.h" -namespace art { +namespace art HIDDEN { static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint( Thread* self, const DexFile& dex_file, Handle<mirror::DexCache> hint_dex_cache) @@ -41,18 +41,14 @@ static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint( } } -class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { +class ReferenceTypePropagation::RTPVisitor final : public HGraphDelegateVisitor { public: - RTPVisitor(HGraph* graph, - Handle<mirror::ClassLoader> class_loader, - Handle<mirror::DexCache> hint_dex_cache, - bool is_first_run) - : HGraphDelegateVisitor(graph), - class_loader_(class_loader), - hint_dex_cache_(hint_dex_cache), - allocator_(graph->GetArenaStack()), - worklist_(allocator_.Adapter(kArenaAllocReferenceTypePropagation)), - is_first_run_(is_first_run) { + RTPVisitor(HGraph* graph, Handle<mirror::DexCache> hint_dex_cache, bool is_first_run) + : HGraphDelegateVisitor(graph), + hint_dex_cache_(hint_dex_cache), + allocator_(graph->GetArenaStack()), + worklist_(allocator_.Adapter(kArenaAllocReferenceTypePropagation)), + is_first_run_(is_first_run) { worklist_.reserve(kDefaultWorklistSize); } @@ -110,7 +106,6 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { static constexpr size_t kDefaultWorklistSize = 8; - Handle<mirror::ClassLoader> class_loader_; Handle<mirror::DexCache> hint_dex_cache_; // Use local allocator for allocating memory. @@ -122,63 +117,18 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { }; ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, - Handle<mirror::ClassLoader> class_loader, Handle<mirror::DexCache> hint_dex_cache, bool is_first_run, const char* name) - : HOptimization(graph, name), - class_loader_(class_loader), - hint_dex_cache_(hint_dex_cache), - is_first_run_(is_first_run) { -} - -void ReferenceTypePropagation::ValidateTypes() { - // TODO: move this to the graph checker. Note: There may be no Thread for gtests. - if (kIsDebugBuild && Thread::Current() != nullptr) { - ScopedObjectAccess soa(Thread::Current()); - for (HBasicBlock* block : graph_->GetReversePostOrder()) { - for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) { - HInstruction* instr = iti.Current(); - if (instr->GetType() == DataType::Type::kReference) { - DCHECK(instr->GetReferenceTypeInfo().IsValid()) - << "Invalid RTI for instruction: " << instr->DebugName(); - if (instr->IsBoundType()) { - DCHECK(instr->AsBoundType()->GetUpperBound().IsValid()); - } else if (instr->IsLoadClass()) { - HLoadClass* cls = instr->AsLoadClass(); - DCHECK(cls->GetReferenceTypeInfo().IsExact()); - DCHECK_IMPLIES(cls->GetLoadedClassRTI().IsValid(), cls->GetLoadedClassRTI().IsExact()); - } else if (instr->IsNullCheck()) { - DCHECK(instr->GetReferenceTypeInfo().IsEqual(instr->InputAt(0)->GetReferenceTypeInfo())) - << "NullCheck " << instr->GetReferenceTypeInfo() - << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo(); - } - } else if (instr->IsInstanceOf()) { - HInstanceOf* iof = instr->AsInstanceOf(); - DCHECK_IMPLIES(iof->GetTargetClassRTI().IsValid(), iof->GetTargetClassRTI().IsExact()); - } else if (instr->IsCheckCast()) { - HCheckCast* check = instr->AsCheckCast(); - DCHECK_IMPLIES(check->GetTargetClassRTI().IsValid(), - check->GetTargetClassRTI().IsExact()); - } - } - } - } -} + : HOptimization(graph, name), hint_dex_cache_(hint_dex_cache), is_first_run_(is_first_run) {} void ReferenceTypePropagation::Visit(HInstruction* instruction) { - RTPVisitor visitor(graph_, - class_loader_, - hint_dex_cache_, - is_first_run_); + RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_); instruction->Accept(&visitor); } void ReferenceTypePropagation::Visit(ArrayRef<HInstruction* const> instructions) { - RTPVisitor visitor(graph_, - class_loader_, - hint_dex_cache_, - is_first_run_); + RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_); for (HInstruction* instruction : instructions) { if (instruction->IsPhi()) { // Need to force phis to recalculate null-ness. @@ -349,7 +299,10 @@ static void BoundTypeForClassCheck(HInstruction* check) { } bool ReferenceTypePropagation::Run() { - RTPVisitor visitor(graph_, class_loader_, hint_dex_cache_, is_first_run_); + DCHECK(Thread::Current() != nullptr) + << "ReferenceTypePropagation requires the use of Thread::Current(). Make sure you have a " + << "Runtime initialized before calling this optimization pass"; + RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_); // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -359,7 +312,6 @@ bool ReferenceTypePropagation::Run() { } visitor.ProcessWorklist(); - ValidateTypes(); return true; } @@ -446,10 +398,13 @@ static bool MatchIfInstanceOf(HIf* ifInstruction, if (rhs->AsIntConstant()->IsTrue()) { // Case (1a) *trueBranch = ifInstruction->IfTrueSuccessor(); - } else { + } else if (rhs->AsIntConstant()->IsFalse()) { // Case (2a) - DCHECK(rhs->AsIntConstant()->IsFalse()) << rhs->AsIntConstant()->GetValue(); *trueBranch = ifInstruction->IfFalseSuccessor(); + } else { + // Sometimes we see a comparison of instance-of with a constant which is neither 0 nor 1. + // In those cases, we cannot do the match if+instance-of. + return false; } *instanceOf = lhs->AsInstanceOf(); return true; @@ -463,10 +418,13 @@ static bool MatchIfInstanceOf(HIf* ifInstruction, if (rhs->AsIntConstant()->IsFalse()) { // Case (1b) *trueBranch = ifInstruction->IfTrueSuccessor(); - } else { + } else if (rhs->AsIntConstant()->IsTrue()) { // Case (2b) - DCHECK(rhs->AsIntConstant()->IsTrue()) << rhs->AsIntConstant()->GetValue(); *trueBranch = ifInstruction->IfFalseSuccessor(); + } else { + // Sometimes we see a comparison of instance-of with a constant which is neither 0 nor 1. + // In those cases, we cannot do the match if+instance-of. + return false; } *instanceOf = lhs->AsInstanceOf(); return true; @@ -583,7 +541,7 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* ScopedObjectAccess soa(Thread::Current()); ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_); ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType( - type_idx, dex_cache, class_loader_.Get()); + type_idx, dex_cache, dex_cache->GetClassLoader()); SetClassAsTypeInfo(instr, klass, is_exact); } diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 889a8465e0..655f62b3da 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -18,12 +18,13 @@ #define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_ #include "base/arena_containers.h" +#include "base/macros.h" #include "mirror/class-inl.h" #include "nodes.h" #include "obj_ptr.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Propagates reference types to instructions. @@ -31,7 +32,6 @@ namespace art { class ReferenceTypePropagation : public HOptimization { public: ReferenceTypePropagation(HGraph* graph, - Handle<mirror::ClassLoader> class_loader, Handle<mirror::DexCache> hint_dex_cache, bool is_first_run, const char* name = kReferenceTypePropagationPassName); @@ -71,10 +71,6 @@ class ReferenceTypePropagation : public HOptimization { HandleCache* handle_cache) REQUIRES_SHARED(Locks::mutator_lock_); - void ValidateTypes(); - - Handle<mirror::ClassLoader> class_loader_; - // Note: hint_dex_cache_ is usually, but not necessarily, the dex cache associated with // graph_->GetDexFile(). Since we may look up also in other dex files, it's used only // as a hint, to reduce the number of calls to the costly ClassLinker::FindDexCache(). diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc index d1bcab083c..2b012fcd67 100644 --- a/compiler/optimizing/reference_type_propagation_test.cc +++ b/compiler/optimizing/reference_type_propagation_test.cc @@ -19,6 +19,7 @@ #include <random> #include "base/arena_allocator.h" +#include "base/macros.h" #include "base/transform_array_ref.h" #include "base/transform_iterator.h" #include "builder.h" @@ -26,7 +27,7 @@ #include "object_lock.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { // TODO It would be good to use the following but there is a miniscule amount of // chance for flakiness so we'll just use a set seed instead. @@ -47,11 +48,8 @@ class ReferenceTypePropagationTestBase : public SuperTest, public OptimizingUnit void SetupPropagation(VariableSizedHandleScope* handles) { graph_ = CreateGraph(handles); - propagation_ = new (GetAllocator()) ReferenceTypePropagation(graph_, - Handle<mirror::ClassLoader>(), - Handle<mirror::DexCache>(), - true, - "test_prop"); + propagation_ = new (GetAllocator()) + ReferenceTypePropagation(graph_, Handle<mirror::DexCache>(), true, "test_prop"); } // Relay method to merge type in reference type propagation. diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 875c633889..53e11f2c3d 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -21,7 +21,7 @@ #include "linear_order.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { RegisterAllocationResolver::RegisterAllocationResolver(CodeGenerator* codegen, const SsaLivenessAnalysis& liveness) diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h index 278371777d..f4782eb48e 100644 --- a/compiler/optimizing/register_allocation_resolver.h +++ b/compiler/optimizing/register_allocation_resolver.h @@ -18,10 +18,11 @@ #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_ #include "base/array_ref.h" +#include "base/macros.h" #include "base/value_object.h" #include "data_type.h" -namespace art { +namespace art HIDDEN { class ArenaAllocator; class CodeGenerator; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index a9c217fc4f..e4c2d74908 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -27,7 +27,7 @@ #include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { RegisterAllocator::RegisterAllocator(ScopedArenaAllocator* allocator, CodeGenerator* codegen, diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 4d226875bf..453e339cba 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -22,7 +22,7 @@ #include "base/arena_object.h" #include "base/macros.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class HBasicBlock; diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 684aaf5750..a7c891d4e7 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -22,7 +22,7 @@ #include "ssa_liveness_analysis.h" #include "thread-current-inl.h" -namespace art { +namespace art HIDDEN { // Highest number of registers that we support for any platform. This can be used for std::bitset, // for example, which needs to know its size at compile time. diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h index e5b86eacee..0e10152049 100644 --- a/compiler/optimizing/register_allocator_graph_color.h +++ b/compiler/optimizing/register_allocator_graph_color.h @@ -24,7 +24,7 @@ #include "base/scoped_arena_containers.h" #include "register_allocator.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class HBasicBlock; diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 833c24d5bb..fcdaa2d34f 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -26,7 +26,7 @@ #include "register_allocation_resolver.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { static constexpr size_t kMaxLifetimePosition = -1; static constexpr size_t kDefaultNumberOfSpillSlots = 4; diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h index 9a1e0d7f10..c71a9e9ff1 100644 --- a/compiler/optimizing/register_allocator_linear_scan.h +++ b/compiler/optimizing/register_allocator_linear_scan.h @@ -22,7 +22,7 @@ #include "base/scoped_arena_containers.h" #include "register_allocator.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class HBasicBlock; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 682315545d..d316aa5dc2 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -18,6 +18,7 @@ #include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" @@ -31,17 +32,17 @@ #include "ssa_liveness_analysis.h" #include "ssa_phi_elimination.h" -namespace art { +namespace art HIDDEN { using Strategy = RegisterAllocator::Strategy; // Note: the register allocator tests rely on the fact that constants have live // intervals and registers get allocated to them. -class RegisterAllocatorTest : public OptimizingUnitTest { +class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void SetUp() override { - OptimizingUnitTest::SetUp(); + CommonCompilerTest::SetUp(); // This test is using the x86 ISA. compiler_options_ = CommonCompilerTest::CreateCompilerOptions(InstructionSet::kX86, "default"); } diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index 8f18ccff5f..116f52605e 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -32,7 +32,7 @@ #include "scheduler_arm.h" #endif -namespace art { +namespace art HIDDEN { void SchedulingGraph::AddDependency(SchedulingNode* node, SchedulingNode* dependency, @@ -718,9 +718,10 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { // HLoadException // HMemoryBarrier // HMonitorOperation - // HNativeDebugInfo + // HNop // HThrow // HTryBoundary + // All volatile field access e.g. HInstanceFieldGet // TODO: Some of the instructions above may be safe to schedule (maybe as // scheduling barriers). return instruction->IsArrayGet() || diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index f7180a02d7..299fbc93f3 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -19,6 +19,7 @@ #include <fstream> +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "base/stl_util.h" @@ -28,7 +29,7 @@ #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { // General description of instruction scheduling. // diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index 965e1bd9f4..3f931c4c49 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -23,7 +23,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { namespace arm { using helpers::Int32ConstantFrom; @@ -669,7 +669,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { } case DataType::Type::kReference: { - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency; } else { if (index->IsConstant()) { @@ -937,7 +937,7 @@ void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruct break; case DataType::Type::kReference: - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; } else { diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h index d11222d9f4..0da21c187f 100644 --- a/compiler/optimizing/scheduler_arm.h +++ b/compiler/optimizing/scheduler_arm.h @@ -17,14 +17,12 @@ #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ +#include "base/macros.h" #include "code_generator_arm_vixl.h" #include "scheduler.h" -namespace art { +namespace art HIDDEN { namespace arm { -// TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere? -typedef CodeGeneratorARMVIXL CodeGeneratorARMType; - // AArch32 instruction latencies. // We currently assume that all ARM CPUs share the same instruction latency list. // The following latencies were tuned based on performance experiments and @@ -49,10 +47,10 @@ static constexpr uint32_t kArmNopLatency = 2; static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; -class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { +class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor { public: explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) - : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} + : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {} // Default visitor for instructions not handled specifically below. void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { @@ -133,7 +131,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { // The latency setting for each HInstruction depends on how CodeGenerator may generate code, // latency visitors may query CodeGenerator for such information for accurate latency settings. - CodeGeneratorARMType* codegen_; + CodeGeneratorARMVIXL* codegen_; }; class HSchedulerARM : public HScheduler { diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc index 4f504c2100..3071afd951 100644 --- a/compiler/optimizing/scheduler_arm64.cc +++ b/compiler/optimizing/scheduler_arm64.cc @@ -20,7 +20,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { namespace arm64 { void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr) { diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index ba5a743545..ec41577e9d 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ +#include "base/macros.h" #include "scheduler.h" -namespace art { +namespace art HIDDEN { namespace arm64 { static constexpr uint32_t kArm64MemoryLoadLatency = 5; @@ -55,7 +56,7 @@ static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60; static constexpr uint32_t kArm64SIMDDivFloatLatency = 30; static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; -class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { +class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor { public: // Default visitor for instructions not handled specifically below. void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index a1cc202a89..165bfe3d94 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -17,6 +17,7 @@ #include "scheduler.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "codegen_test_utils.h" #include "common_compiler_test.h" @@ -34,7 +35,7 @@ #include "scheduler_arm.h" #endif -namespace art { +namespace art HIDDEN { // Return all combinations of ISA and code generator that are executable on // hardware, or on simulator, and that we'd like to test. @@ -65,7 +66,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { return v; } -class SchedulerTest : public OptimizingUnitTest { +class SchedulerTest : public CommonCompilerTest, public OptimizingUnitTestHelper { public: SchedulerTest() : graph_(CreateGraph()) { } diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index 54053820ca..6a10440d11 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -16,10 +16,10 @@ #include "select_generator.h" -#include "base/scoped_arena_containers.h" +#include "optimizing/nodes.h" #include "reference_type_propagation.h" -namespace art { +namespace art HIDDEN { static constexpr size_t kMaxInstructionsInBranch = 1u; @@ -69,156 +69,277 @@ static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) { return block1->GetSingleSuccessor() == block2->GetSingleSuccessor(); } -// Returns nullptr if `block` has either no phis or there is more than one phi -// with different inputs at `index1` and `index2`. Otherwise returns that phi. -static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index2) { +// Returns nullptr if `block` has either no phis or there is more than one phi. Otherwise returns +// that phi. +static HPhi* GetSinglePhi(HBasicBlock* block, size_t index1, size_t index2) { DCHECK_NE(index1, index2); HPhi* select_phi = nullptr; for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - if (phi->InputAt(index1) != phi->InputAt(index2)) { - if (select_phi == nullptr) { - // First phi with different inputs for the two indices found. - select_phi = phi; - } else { - // More than one phis has different inputs for the two indices. - return nullptr; - } + if (select_phi == nullptr) { + // First phi found. + select_phi = phi; + } else { + // More than one phi found, return null. + return nullptr; } } return select_phi; } -bool HSelectGenerator::Run() { - bool didSelect = false; - // Select cache with local allocator. - ScopedArenaAllocator allocator(graph_->GetArenaStack()); - ScopedArenaSafeMap<HInstruction*, HSelect*> cache( - std::less<HInstruction*>(), allocator.Adapter(kArenaAllocSelectGenerator)); +bool HSelectGenerator::TryGenerateSelectSimpleDiamondPattern( + HBasicBlock* block, ScopedArenaSafeMap<HInstruction*, HSelect*>* cache) { + DCHECK(block->GetLastInstruction()->IsIf()); + HIf* if_instruction = block->GetLastInstruction()->AsIf(); + HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); + HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); + DCHECK_NE(true_block, false_block); - // Iterate in post order in the unlikely case that removing one occurrence of - // the selection pattern empties a branch block of another occurrence. - for (HBasicBlock* block : graph_->GetPostOrder()) { - if (!block->EndsWithIf()) continue; + if (!IsSimpleBlock(true_block) || + !IsSimpleBlock(false_block) || + !BlocksMergeTogether(true_block, false_block)) { + return false; + } + HBasicBlock* merge_block = true_block->GetSingleSuccessor(); - // Find elements of the diamond pattern. - HIf* if_instruction = block->GetLastInstruction()->AsIf(); - HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); - HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); - DCHECK_NE(true_block, false_block); + // If the branches are not empty, move instructions in front of the If. + // TODO(dbrazdil): This puts an instruction between If and its condition. + // Implement moving of conditions to first users if possible. + while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { + HInstruction* instr = true_block->GetFirstInstruction(); + DCHECK(!instr->CanThrow()); + instr->MoveBefore(if_instruction); + } + while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { + HInstruction* instr = false_block->GetFirstInstruction(); + DCHECK(!instr->CanThrow()); + instr->MoveBefore(if_instruction); + } + DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn()); + DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn()); - if (!IsSimpleBlock(true_block) || - !IsSimpleBlock(false_block) || - !BlocksMergeTogether(true_block, false_block)) { - continue; - } - HBasicBlock* merge_block = true_block->GetSingleSuccessor(); - - // If the branches are not empty, move instructions in front of the If. - // TODO(dbrazdil): This puts an instruction between If and its condition. - // Implement moving of conditions to first users if possible. - while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { - HInstruction* instr = true_block->GetFirstInstruction(); - DCHECK(!instr->CanThrow()); - instr->MoveBefore(if_instruction); - } - while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { - HInstruction* instr = false_block->GetFirstInstruction(); - DCHECK(!instr->CanThrow()); - instr->MoveBefore(if_instruction); - } - DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn()); - DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn()); - - // Find the resulting true/false values. - size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block); - size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block); - DCHECK_NE(predecessor_index_true, predecessor_index_false); - - bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn(); - HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false); - - HInstruction* true_value = nullptr; - HInstruction* false_value = nullptr; - if (both_successors_return) { - true_value = true_block->GetFirstInstruction()->InputAt(0); - false_value = false_block->GetFirstInstruction()->InputAt(0); - } else if (phi != nullptr) { - true_value = phi->InputAt(predecessor_index_true); - false_value = phi->InputAt(predecessor_index_false); - } else { - continue; - } - DCHECK(both_successors_return || phi != nullptr); - - // Create the Select instruction and insert it in front of the If. - HInstruction* condition = if_instruction->InputAt(0); - HSelect* select = new (graph_->GetAllocator()) HSelect(condition, - true_value, - false_value, - if_instruction->GetDexPc()); - if (both_successors_return) { - if (true_value->GetType() == DataType::Type::kReference) { - DCHECK(false_value->GetType() == DataType::Type::kReference); - ReferenceTypePropagation::FixUpInstructionType(select, graph_->GetHandleCache()); - } - } else if (phi->GetType() == DataType::Type::kReference) { - select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo()); - } - block->InsertInstructionBefore(select, if_instruction); + // Find the resulting true/false values. + size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block); + size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block); + DCHECK_NE(predecessor_index_true, predecessor_index_false); - // Remove the true branch which removes the corresponding Phi - // input if needed. If left only with the false branch, the Phi is - // automatically removed. - if (both_successors_return) { - false_block->GetFirstInstruction()->ReplaceInput(select, 0); - } else { - phi->ReplaceInput(select, predecessor_index_false); + bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn(); + // TODO(solanes): Extend to support multiple phis? e.g. + // int a, b; + // if (bool) { + // a = 0; b = 1; + // } else { + // a = 1; b = 2; + // } + // // use a and b + HPhi* phi = GetSinglePhi(merge_block, predecessor_index_true, predecessor_index_false); + + HInstruction* true_value = nullptr; + HInstruction* false_value = nullptr; + if (both_successors_return) { + true_value = true_block->GetFirstInstruction()->InputAt(0); + false_value = false_block->GetFirstInstruction()->InputAt(0); + } else if (phi != nullptr) { + true_value = phi->InputAt(predecessor_index_true); + false_value = phi->InputAt(predecessor_index_false); + } else { + return false; + } + DCHECK(both_successors_return || phi != nullptr); + + // Create the Select instruction and insert it in front of the If. + HInstruction* condition = if_instruction->InputAt(0); + HSelect* select = new (graph_->GetAllocator()) HSelect(condition, + true_value, + false_value, + if_instruction->GetDexPc()); + if (both_successors_return) { + if (true_value->GetType() == DataType::Type::kReference) { + DCHECK(false_value->GetType() == DataType::Type::kReference); + ReferenceTypePropagation::FixUpInstructionType(select, graph_->GetHandleCache()); } + } else if (phi->GetType() == DataType::Type::kReference) { + select->SetReferenceTypeInfoIfValid(phi->GetReferenceTypeInfo()); + } + block->InsertInstructionBefore(select, if_instruction); - bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u); - true_block->DisconnectAndDelete(); + // Remove the true branch which removes the corresponding Phi + // input if needed. If left only with the false branch, the Phi is + // automatically removed. + if (both_successors_return) { + false_block->GetFirstInstruction()->ReplaceInput(select, 0); + } else { + phi->ReplaceInput(select, predecessor_index_false); + } + + bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u); + true_block->DisconnectAndDelete(); + + // Merge remaining blocks which are now connected with Goto. + DCHECK_EQ(block->GetSingleSuccessor(), false_block); + block->MergeWith(false_block); + if (!both_successors_return && only_two_predecessors) { + DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr); + DCHECK_EQ(block->GetSingleSuccessor(), merge_block); + block->MergeWith(merge_block); + } - // Merge remaining blocks which are now connected with Goto. - DCHECK_EQ(block->GetSingleSuccessor(), false_block); - block->MergeWith(false_block); - if (!both_successors_return && only_two_predecessors) { - DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr); - DCHECK_EQ(block->GetSingleSuccessor(), merge_block); - block->MergeWith(merge_block); + MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated); + + // Very simple way of finding common subexpressions in the generated HSelect statements + // (since this runs after GVN). Lookup by condition, and reuse latest one if possible + // (due to post order, latest select is most likely replacement). If needed, we could + // improve this by e.g. using the operands in the map as well. + auto it = cache->find(condition); + if (it == cache->end()) { + cache->Put(condition, select); + } else { + // Found cached value. See if latest can replace cached in the HIR. + HSelect* cached_select = it->second; + DCHECK_EQ(cached_select->GetCondition(), select->GetCondition()); + if (cached_select->GetTrueValue() == select->GetTrueValue() && + cached_select->GetFalseValue() == select->GetFalseValue() && + select->StrictlyDominates(cached_select)) { + cached_select->ReplaceWith(select); + cached_select->GetBlock()->RemoveInstruction(cached_select); } + it->second = select; // always cache latest + } + + // No need to update dominance information, as we are simplifying + // a simple diamond shape, where the join block is merged with the + // entry block. Any following blocks would have had the join block + // as a dominator, and `MergeWith` handles changing that to the + // entry block + return true; +} - MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated); +HBasicBlock* HSelectGenerator::TryFixupDoubleDiamondPattern(HBasicBlock* block) { + DCHECK(block->GetLastInstruction()->IsIf()); + HIf* if_instruction = block->GetLastInstruction()->AsIf(); + HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); + HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); + DCHECK_NE(true_block, false_block); - // Very simple way of finding common subexpressions in the generated HSelect statements - // (since this runs after GVN). Lookup by condition, and reuse latest one if possible - // (due to post order, latest select is most likely replacement). If needed, we could - // improve this by e.g. using the operands in the map as well. - auto it = cache.find(condition); - if (it == cache.end()) { - cache.Put(condition, select); + // One branch must be a single goto, and the other one the inner if. + if (true_block->IsSingleGoto() == false_block->IsSingleGoto()) { + return nullptr; + } + + HBasicBlock* single_goto = true_block->IsSingleGoto() ? true_block : false_block; + HBasicBlock* inner_if_block = true_block->IsSingleGoto() ? false_block : true_block; + + // The innner if branch has to be a block with just a comparison and an if. + if (!inner_if_block->EndsWithIf() || + inner_if_block->GetLastInstruction()->AsIf()->InputAt(0) != + inner_if_block->GetFirstInstruction() || + inner_if_block->GetLastInstruction()->GetPrevious() != + inner_if_block->GetFirstInstruction() || + !inner_if_block->GetFirstInstruction()->IsCondition()) { + return nullptr; + } + + HIf* inner_if_instruction = inner_if_block->GetLastInstruction()->AsIf(); + HBasicBlock* inner_if_true_block = inner_if_instruction->IfTrueSuccessor(); + HBasicBlock* inner_if_false_block = inner_if_instruction->IfFalseSuccessor(); + if (!inner_if_true_block->IsSingleGoto() || !inner_if_false_block->IsSingleGoto()) { + return nullptr; + } + + // One must merge into the outer condition and the other must not. + if (BlocksMergeTogether(single_goto, inner_if_true_block) == + BlocksMergeTogether(single_goto, inner_if_false_block)) { + return nullptr; + } + + // First merge merges the outer if with one of the inner if branches. The block must be a Phi and + // a Goto. + HBasicBlock* first_merge = single_goto->GetSingleSuccessor(); + if (first_merge->GetNumberOfPredecessors() != 2 || + first_merge->GetPhis().CountSize() != 1 || + !first_merge->GetLastInstruction()->IsGoto() || + first_merge->GetFirstInstruction() != first_merge->GetLastInstruction()) { + return nullptr; + } + + HPhi* first_phi = first_merge->GetFirstPhi()->AsPhi(); + + // Second merge is first_merge and the remainder branch merging. It must be phi + goto, or phi + + // return. Depending on the first merge, we define the second merge. + HBasicBlock* merges_into_second_merge = + BlocksMergeTogether(single_goto, inner_if_true_block) + ? inner_if_false_block + : inner_if_true_block; + if (!BlocksMergeTogether(first_merge, merges_into_second_merge)) { + return nullptr; + } + + HBasicBlock* second_merge = merges_into_second_merge->GetSingleSuccessor(); + if (second_merge->GetNumberOfPredecessors() != 2 || + second_merge->GetPhis().CountSize() != 1 || + !(second_merge->GetLastInstruction()->IsGoto() || + second_merge->GetLastInstruction()->IsReturn()) || + second_merge->GetFirstInstruction() != second_merge->GetLastInstruction()) { + return nullptr; + } + + size_t index = second_merge->GetPredecessorIndexOf(merges_into_second_merge); + HPhi* second_phi = second_merge->GetFirstPhi()->AsPhi(); + + // Merge the phis. + first_phi->AddInput(second_phi->InputAt(index)); + merges_into_second_merge->ReplaceSuccessor(second_merge, first_merge); + second_phi->ReplaceWith(first_phi); + second_merge->RemovePhi(second_phi); + + // Sort out the new domination before merging the blocks + DCHECK_EQ(second_merge->GetSinglePredecessor(), first_merge); + second_merge->GetDominator()->RemoveDominatedBlock(second_merge); + second_merge->SetDominator(first_merge); + first_merge->AddDominatedBlock(second_merge); + first_merge->MergeWith(second_merge); + + // No need to update dominance information. There's a chance that `merges_into_second_merge` + // doesn't come before `first_merge` but we don't need to fix it since `merges_into_second_merge` + // will disappear from the graph altogether when doing the follow-up + // TryGenerateSelectSimpleDiamondPattern. + + return inner_if_block; +} + +bool HSelectGenerator::Run() { + bool did_select = false; + // Select cache with local allocator. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + ScopedArenaSafeMap<HInstruction*, HSelect*> cache(std::less<HInstruction*>(), + allocator.Adapter(kArenaAllocSelectGenerator)); + + // Iterate in post order in the unlikely case that removing one occurrence of + // the selection pattern empties a branch block of another occurrence. + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (!block->EndsWithIf()) { + continue; + } + + if (TryGenerateSelectSimpleDiamondPattern(block, &cache)) { + did_select = true; } else { - // Found cached value. See if latest can replace cached in the HIR. - HSelect* cached = it->second; - DCHECK_EQ(cached->GetCondition(), select->GetCondition()); - if (cached->GetTrueValue() == select->GetTrueValue() && - cached->GetFalseValue() == select->GetFalseValue() && - select->StrictlyDominates(cached)) { - cached->ReplaceWith(select); - cached->GetBlock()->RemoveInstruction(cached); + // Try to fix up the odd version of the double diamond pattern. If we could do it, it means + // that we can generate two selects. + HBasicBlock* inner_if_block = TryFixupDoubleDiamondPattern(block); + if (inner_if_block != nullptr) { + // Generate the selects now since `inner_if_block` should be after `block` in PostOrder. + bool result = TryGenerateSelectSimpleDiamondPattern(inner_if_block, &cache); + DCHECK(result); + result = TryGenerateSelectSimpleDiamondPattern(block, &cache); + DCHECK(result); + did_select = true; } - it->second = select; // always cache latest } - - // No need to update dominance information, as we are simplifying - // a simple diamond shape, where the join block is merged with the - // entry block. Any following blocks would have had the join block - // as a dominator, and `MergeWith` handles changing that to the - // entry block. - didSelect = true; } - return didSelect; + + return did_select; } } // namespace art diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h index 30ac8a86eb..7aa0803d89 100644 --- a/compiler/optimizing/select_generator.h +++ b/compiler/optimizing/select_generator.h @@ -57,9 +57,12 @@ #ifndef ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_ #define ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_ +#include "base/macros.h" +#include "base/scoped_arena_containers.h" #include "optimization.h" +#include "optimizing/nodes.h" -namespace art { +namespace art HIDDEN { class HSelectGenerator : public HOptimization { public: @@ -72,6 +75,43 @@ class HSelectGenerator : public HOptimization { static constexpr const char* kSelectGeneratorPassName = "select_generator"; private: + bool TryGenerateSelectSimpleDiamondPattern(HBasicBlock* block, + ScopedArenaSafeMap<HInstruction*, HSelect*>* cache); + + // When generating code for nested ternary operators (e.g. `return (x > 100) ? 100 : ((x < -100) ? + // -100 : x);`), a dexer can generate a double diamond pattern but it is not a clear cut one due + // to the merging of the blocks. `TryFixupDoubleDiamondPattern` recognizes that pattern and fixes + // up the graph to have a clean double diamond that `TryGenerateSelectSimpleDiamondPattern` can + // use to generate selects. + // + // In ASCII, it turns: + // + // 1 (outer if) + // / \ + // 2 3 (inner if) + // | / \ + // | 4 5 + // \/ | + // 6 | + // \ | + // 7 + // | + // 8 + // into: + // 1 (outer if) + // / \ + // 2 3 (inner if) + // | / \ + // | 4 5 + // \/ / + // 6 + // | + // 8 + // + // In short, block 7 disappears and we merge 6 and 7. Now we have a diamond with {3,4,5,6}, and + // when that gets resolved we get another one with the outer if. + HBasicBlock* TryFixupDoubleDiamondPattern(HBasicBlock* block); + DISALLOW_COPY_AND_ASSIGN(HSelectGenerator); }; diff --git a/compiler/optimizing/select_generator_test.cc b/compiler/optimizing/select_generator_test.cc index b18d41abbb..fc9e150d92 100644 --- a/compiler/optimizing/select_generator_test.cc +++ b/compiler/optimizing/select_generator_test.cc @@ -17,12 +17,13 @@ #include "select_generator.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { class SelectGeneratorTest : public OptimizingUnitTest { protected: diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 17cf3d3477..277edff33e 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -34,7 +34,7 @@ #include "runtime.h" #include "scoped_thread_state_change-inl.h" -namespace art { +namespace art HIDDEN { static bool IsInBootImage(ArtMethod* method) { gc::Heap* heap = Runtime::Current()->GetHeap(); @@ -63,9 +63,9 @@ HInvokeStaticOrDirect::DispatchInfo HSharpening::SharpenLoadMethod( bool for_interface_call, CodeGenerator* codegen) { if (kIsDebugBuild) { - ScopedObjectAccess soa(Thread::Current()); // Required for GetDeclaringClass below. + ScopedObjectAccess soa(Thread::Current()); // Required for `IsStringConstructor()` below. DCHECK(callee != nullptr); - DCHECK(!(callee->IsConstructor() && callee->GetDeclaringClass()->IsStringClass())); + DCHECK(!callee->IsStringConstructor()); } MethodLoadKind method_load_kind; diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 975366918c..6dfe904f27 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_SHARPENING_H_ #define ART_COMPILER_OPTIMIZING_SHARPENING_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class DexCompilationUnit; diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index ba97b43de9..56719b100e 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -16,7 +16,7 @@ #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { bool SideEffectsAnalysis::Run() { // Inlining might have created more blocks, so we need to increase the size diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h index 56a01e63f1..47fcdc5d1b 100644 --- a/compiler/optimizing/side_effects_analysis.h +++ b/compiler/optimizing/side_effects_analysis.h @@ -18,10 +18,11 @@ #define ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_ #include "base/arena_containers.h" +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class SideEffectsAnalysis : public HOptimization { public: diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc index 268798ca7d..f2b781dfa4 100644 --- a/compiler/optimizing/side_effects_test.cc +++ b/compiler/optimizing/side_effects_test.cc @@ -16,10 +16,11 @@ #include <gtest/gtest.h> +#include "base/macros.h" #include "data_type.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { // Only runtime types other than void are allowed. static const DataType::Type kTestTypes[] = { diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 67ee83c9dd..a658252e69 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -27,7 +27,7 @@ #include "scoped_thread_state_change-inl.h" #include "ssa_phi_elimination.h" -namespace art { +namespace art HIDDEN { void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. @@ -538,7 +538,6 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // Compute type of reference type instructions. The pass assumes that // NullConstant has been fixed up. ReferenceTypePropagation(graph_, - class_loader_, dex_cache_, /* is_first_run= */ true).Run(); diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index a7d4e0ebd3..99a5469932 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -17,12 +17,13 @@ #ifndef ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_ +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Transforms a graph into SSA form. The liveness guarantees of diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 18942a1823..317e0999d7 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -21,7 +21,7 @@ #include "linear_order.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { void SsaLivenessAnalysis::Analyze() { // Compute the linear order directly in the graph's data structure diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 7f31585f34..cc2b49cf22 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -21,11 +21,12 @@ #include "base/intrusive_forward_list.h" #include "base/iteration_range.h" +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class SsaLivenessAnalysis; diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index a477893d57..2df0f34c7d 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -20,12 +20,13 @@ #include "arch/instruction_set_features.h" #include "base/arena_allocator.h" #include "base/arena_containers.h" +#include "base/macros.h" #include "code_generator.h" #include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { class SsaLivenessAnalysisTest : public OptimizingUnitTest { protected: diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 8fd6962500..ce343dffec 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -21,7 +21,7 @@ #include "base/scoped_arena_containers.h" #include "base/bit_vector-inl.h" -namespace art { +namespace art HIDDEN { bool SsaDeadPhiElimination::Run() { MarkDeadPhis(); diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h index c5cc752ffc..f606f928fa 100644 --- a/compiler/optimizing/ssa_phi_elimination.h +++ b/compiler/optimizing/ssa_phi_elimination.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Optimization phase that removes dead phis from the graph. Dead phis are unused diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index e679893af2..980493db34 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -17,6 +17,7 @@ #include "android-base/stringprintf.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" @@ -27,9 +28,9 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { -class SsaTest : public OptimizingUnitTest { +class SsaTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, const char* expected); }; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index f55bbee1c8..1a368ed347 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -20,6 +20,7 @@ #include <vector> #include "art_method-inl.h" +#include "base/globals.h" #include "base/stl_util.h" #include "class_linker.h" #include "dex/dex_file.h" @@ -32,7 +33,7 @@ #include "scoped_thread_state_change-inl.h" #include "stack_map.h" -namespace art { +namespace art HIDDEN { constexpr static bool kVerifyStackMaps = kIsDebugBuild; @@ -49,7 +50,8 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes, size_t core_spill_mask, size_t fp_spill_mask, uint32_t num_dex_registers, - bool baseline) { + bool baseline, + bool debuggable) { DCHECK(!in_method_) << "Mismatched Begin/End calls"; in_method_ = true; DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called"; @@ -60,6 +62,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes, fp_spill_mask_ = fp_spill_mask; num_dex_registers_ = num_dex_registers; baseline_ = baseline; + debuggable_ = debuggable; if (kVerifyStackMaps) { dchecks_.emplace_back([=](const CodeInfo& code_info) { @@ -99,16 +102,21 @@ void StackMapStream::EndMethod(size_t code_size) { } } -void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, - uint32_t native_pc_offset, - uint32_t register_mask, - BitVector* stack_mask, - StackMap::Kind kind, - bool needs_vreg_info) { +void StackMapStream::BeginStackMapEntry( + uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask, + BitVector* stack_mask, + StackMap::Kind kind, + bool needs_vreg_info, + const std::vector<uint32_t>& dex_pc_list_for_catch_verification) { DCHECK(in_method_) << "Call BeginMethod first"; DCHECK(!in_stack_map_) << "Mismatched Begin/End calls"; in_stack_map_ = true; + DCHECK_IMPLIES(!dex_pc_list_for_catch_verification.empty(), kind == StackMap::Kind::Catch); + DCHECK_IMPLIES(!dex_pc_list_for_catch_verification.empty(), kIsDebugBuild); + current_stack_map_ = BitTableBuilder<StackMap>::Entry(); current_stack_map_[StackMap::kKind] = static_cast<uint32_t>(kind); current_stack_map_[StackMap::kPackedNativePc] = @@ -149,7 +157,8 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, instruction_set_); CHECK_EQ(stack_map.Row(), stack_map_index); } else if (kind == StackMap::Kind::Catch) { - StackMap stack_map = code_info.GetCatchStackMapForDexPc(dex_pc); + StackMap stack_map = code_info.GetCatchStackMapForDexPc( + ArrayRef<const uint32_t>(dex_pc_list_for_catch_verification)); CHECK_EQ(stack_map.Row(), stack_map_index); } StackMap stack_map = code_info.GetStackMapAt(stack_map_index); @@ -367,6 +376,7 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() { uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0; flags |= baseline_ ? CodeInfo::kIsBaseline : 0; + flags |= debuggable_ ? CodeInfo::kIsDebuggable : 0; DCHECK_LE(flags, kVarintMax); // Ensure flags can be read directly as byte. uint32_t bit_table_flags = 0; ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) { diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 27145a174c..643af2da94 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -21,6 +21,7 @@ #include "base/arena_bit_vector.h" #include "base/bit_table.h" #include "base/bit_vector-inl.h" +#include "base/macros.h" #include "base/memory_region.h" #include "base/scoped_arena_containers.h" #include "base/value_object.h" @@ -28,7 +29,7 @@ #include "nodes.h" #include "stack_map.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; @@ -64,15 +65,19 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { size_t core_spill_mask, size_t fp_spill_mask, uint32_t num_dex_registers, - bool baseline = false); + bool baseline, + bool debuggable); void EndMethod(size_t code_size); - void BeginStackMapEntry(uint32_t dex_pc, - uint32_t native_pc_offset, - uint32_t register_mask = 0, - BitVector* sp_mask = nullptr, - StackMap::Kind kind = StackMap::Kind::Default, - bool needs_vreg_info = true); + void BeginStackMapEntry( + uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask = 0, + BitVector* sp_mask = nullptr, + StackMap::Kind kind = StackMap::Kind::Default, + bool needs_vreg_info = true, + const std::vector<uint32_t>& dex_pc_list_for_catch_verification = std::vector<uint32_t>()); + void EndStackMapEntry(); void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { @@ -125,6 +130,7 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { uint32_t fp_spill_mask_ = 0; uint32_t num_dex_registers_ = 0; bool baseline_; + bool debuggable_; BitTableBuilder<StackMap> stack_maps_; BitTableBuilder<RegisterMask> register_masks_; BitmapTableBuilder stack_masks_; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index f6a739e15a..a2c30e7681 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -18,12 +18,13 @@ #include "art_method.h" #include "base/arena_bit_vector.h" +#include "base/macros.h" #include "base/malloc_arena_pool.h" #include "stack_map_stream.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { // Check that the stack mask of given stack map is identical // to the given bit vector. Returns true if they are same. @@ -52,7 +53,12 @@ TEST(StackMapTest, Test1) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArenaBitVector sp_mask(&allocator, 0, false); size_t number_of_dex_registers = 2; @@ -106,7 +112,12 @@ TEST(StackMapTest, Test2) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -300,7 +311,12 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -363,7 +379,12 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; @@ -411,7 +432,12 @@ TEST(StackMapTest, TestShareDexRegisterMap) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; @@ -467,7 +493,12 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 1); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 1, + /* baseline= */ false, + /* debuggable= */ false); ArenaBitVector sp_mask(&allocator, 0, false); stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); @@ -512,7 +543,12 @@ TEST(StackMapTest, InlineTest) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -702,7 +738,12 @@ TEST(StackMapTest, TestDeduplicateStackMask) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 0); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 0, + /* baseline= */ false, + /* debuggable= */ false); ArenaBitVector sp_mask(&allocator, 0, true); sp_mask.SetBit(1); diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc index a5f919c31c..7c0097c6f6 100644 --- a/compiler/optimizing/superblock_cloner.cc +++ b/compiler/optimizing/superblock_cloner.cc @@ -22,7 +22,7 @@ #include <sstream> -namespace art { +namespace art HIDDEN { using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; using HInstructionMap = SuperblockCloner::HInstructionMap; @@ -633,7 +633,7 @@ void SuperblockCloner::ConstructSubgraphClosedSSA() { HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType()); if (value->GetType() == DataType::Type::kReference) { - phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo()); + phi->SetReferenceTypeInfoIfValid(value->GetReferenceTypeInfo()); } exit_block->AddPhi(phi); diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h index 1f6ee74fbd..421701fb19 100644 --- a/compiler/optimizing/superblock_cloner.h +++ b/compiler/optimizing/superblock_cloner.h @@ -20,9 +20,10 @@ #include "base/arena_bit_vector.h" #include "base/arena_containers.h" #include "base/bit_vector-inl.h" +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class InductionVarRange; diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc index d8d68b7763..ea2563ea7d 100644 --- a/compiler/optimizing/superblock_cloner_test.cc +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/macros.h" #include "graph_checker.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -21,7 +22,7 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; using HInstructionMap = SuperblockCloner::HInstructionMap; diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc index 33823e2a11..76e7e0c32c 100644 --- a/compiler/optimizing/suspend_check_test.cc +++ b/compiler/optimizing/suspend_check_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/macros.h" #include "builder.h" #include "dex/dex_instruction.h" #include "nodes.h" @@ -22,13 +23,13 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { /** * Check that the HGraphBuilder adds suspend checks to backward branches. */ -class SuspendCheckTest : public OptimizingUnitTest { +class SuspendCheckTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data); }; diff --git a/compiler/optimizing/write_barrier_elimination.cc b/compiler/optimizing/write_barrier_elimination.cc new file mode 100644 index 0000000000..eb70b670fe --- /dev/null +++ b/compiler/optimizing/write_barrier_elimination.cc @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "write_barrier_elimination.h" + +#include "base/arena_allocator.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" +#include "optimizing/nodes.h" + +namespace art HIDDEN { + +class WBEVisitor final : public HGraphVisitor { + public: + WBEVisitor(HGraph* graph, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), + scoped_allocator_(graph->GetArenaStack()), + current_write_barriers_(scoped_allocator_.Adapter(kArenaAllocWBE)), + stats_(stats) {} + + void VisitBasicBlock(HBasicBlock* block) override { + // We clear the map to perform this optimization only in the same block. Doing it across blocks + // would entail non-trivial merging of states. + current_write_barriers_.clear(); + HGraphVisitor::VisitBasicBlock(block); + } + + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { + DCHECK(!instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())); + + if (instruction->GetFieldType() != DataType::Type::kReference || + instruction->GetValue()->IsNullConstant()) { + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + return; + } + + MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier); + HInstruction* obj = HuntForOriginalReference(instruction->InputAt(0)); + auto it = current_write_barriers_.find(obj); + if (it != current_write_barriers_.end()) { + DCHECK(it->second->IsInstanceFieldSet()); + DCHECK(it->second->AsInstanceFieldSet()->GetWriteBarrierKind() != + WriteBarrierKind::kDontEmit); + DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock()); + it->second->AsInstanceFieldSet()->SetWriteBarrierKind(WriteBarrierKind::kEmitNoNullCheck); + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier); + } else { + const bool inserted = current_write_barriers_.insert({obj, instruction}).second; + DCHECK(inserted); + DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + } + } + + void VisitStaticFieldSet(HStaticFieldSet* instruction) override { + DCHECK(!instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())); + + if (instruction->GetFieldType() != DataType::Type::kReference || + instruction->GetValue()->IsNullConstant()) { + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + return; + } + + MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier); + HInstruction* cls = HuntForOriginalReference(instruction->InputAt(0)); + auto it = current_write_barriers_.find(cls); + if (it != current_write_barriers_.end()) { + DCHECK(it->second->IsStaticFieldSet()); + DCHECK(it->second->AsStaticFieldSet()->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock()); + it->second->AsStaticFieldSet()->SetWriteBarrierKind(WriteBarrierKind::kEmitNoNullCheck); + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier); + } else { + const bool inserted = current_write_barriers_.insert({cls, instruction}).second; + DCHECK(inserted); + DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + } + } + + void VisitArraySet(HArraySet* instruction) override { + if (instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) { + ClearCurrentValues(); + } + + if (instruction->GetComponentType() != DataType::Type::kReference || + instruction->GetValue()->IsNullConstant()) { + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + return; + } + + HInstruction* arr = HuntForOriginalReference(instruction->InputAt(0)); + MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier); + auto it = current_write_barriers_.find(arr); + if (it != current_write_barriers_.end()) { + DCHECK(it->second->IsArraySet()); + DCHECK(it->second->AsArraySet()->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock()); + // We never skip the null check in ArraySets so that value is already set. + DCHECK(it->second->AsArraySet()->GetWriteBarrierKind() == WriteBarrierKind::kEmitNoNullCheck); + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier); + } else { + const bool inserted = current_write_barriers_.insert({arr, instruction}).second; + DCHECK(inserted); + DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + } + } + + void VisitInstruction(HInstruction* instruction) override { + if (instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) { + ClearCurrentValues(); + } + } + + private: + void ClearCurrentValues() { current_write_barriers_.clear(); } + + HInstruction* HuntForOriginalReference(HInstruction* ref) const { + // An original reference can be transformed by instructions like: + // i0 NewArray + // i1 HInstruction(i0) <-- NullCheck, BoundType, IntermediateAddress. + // i2 ArraySet(i1, index, value) + DCHECK(ref != nullptr); + while (ref->IsNullCheck() || ref->IsBoundType() || ref->IsIntermediateAddress()) { + ref = ref->InputAt(0); + } + return ref; + } + + ScopedArenaAllocator scoped_allocator_; + + // Stores a map of <Receiver, InstructionWhereTheWriteBarrierIs>. + // `InstructionWhereTheWriteBarrierIs` is used for DCHECKs only. + ScopedArenaHashMap<HInstruction*, HInstruction*> current_write_barriers_; + + OptimizingCompilerStats* const stats_; + + DISALLOW_COPY_AND_ASSIGN(WBEVisitor); +}; + +bool WriteBarrierElimination::Run() { + WBEVisitor wbe_visitor(graph_, stats_); + wbe_visitor.VisitReversePostOrder(); + return true; +} + +} // namespace art diff --git a/compiler/optimizing/write_barrier_elimination.h b/compiler/optimizing/write_barrier_elimination.h new file mode 100644 index 0000000000..a3769e7421 --- /dev/null +++ b/compiler/optimizing/write_barrier_elimination.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_ +#define ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_ + +#include "base/macros.h" +#include "optimization.h" + +namespace art HIDDEN { + +// Eliminates unnecessary write barriers from InstanceFieldSet, StaticFieldSet, and ArraySet. +// +// We can eliminate redundant write barriers as we don't need several for the same receiver. For +// example: +// MyObject o; +// o.inner_obj = io; +// o.inner_obj2 = io2; +// o.inner_obj3 = io3; +// We can keep the write barrier for `inner_obj` and remove the other two. +// +// In order to do this, we set the WriteBarrierKind of the instruction. The instruction's kind are +// set to kEmitNoNullCheck (if this write barrier coalesced other write barriers, we don't want to +// perform the null check optimization), or to kDontEmit (if the write barrier as a whole is not +// needed). +class WriteBarrierElimination : public HOptimization { + public: + WriteBarrierElimination(HGraph* graph, + OptimizingCompilerStats* stats, + const char* name = kWBEPassName) + : HOptimization(graph, name, stats) {} + + bool Run() override; + + static constexpr const char* kWBEPassName = "write_barrier_elimination"; + + private: + DISALLOW_COPY_AND_ASSIGN(WriteBarrierElimination); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_ diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc index b1abcf6747..e266618980 100644 --- a/compiler/optimizing/x86_memory_gen.cc +++ b/compiler/optimizing/x86_memory_gen.cc @@ -18,13 +18,13 @@ #include "code_generator.h" #include "driver/compiler_options.h" -namespace art { +namespace art HIDDEN { namespace x86 { /** * Replace instructions with memory operand forms. */ -class MemoryOperandVisitor : public HGraphVisitor { +class MemoryOperandVisitor final : public HGraphVisitor { public: MemoryOperandVisitor(HGraph* graph, bool do_implicit_null_checks) : HGraphVisitor(graph), diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h index 3f4178d58a..1cae1a5d3a 100644 --- a/compiler/optimizing/x86_memory_gen.h +++ b/compiler/optimizing/x86_memory_gen.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_ #define ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; namespace x86 { diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc index 0aaeaa5b4f..a122d3c9d3 100644 --- a/compiler/trampolines/trampoline_compiler.cc +++ b/compiler/trampolines/trampoline_compiler.cc @@ -38,7 +38,7 @@ #define __ assembler. -namespace art { +namespace art HIDDEN { #ifdef ART_ENABLE_CODEGEN_arm namespace arm { @@ -208,6 +208,8 @@ std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet is return x86::CreateTrampoline(&allocator, offset); #endif default: + UNUSED(abi); + UNUSED(offset); LOG(FATAL) << "Unexpected InstructionSet: " << isa; UNREACHABLE(); } diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h index f0086b58d5..32e35ae1d6 100644 --- a/compiler/trampolines/trampoline_compiler.h +++ b/compiler/trampolines/trampoline_compiler.h @@ -22,9 +22,10 @@ #include <vector> #include "arch/instruction_set.h" +#include "base/macros.h" #include "offsets.h" -namespace art { +namespace art HIDDEN { enum EntryPointCallingConvention { // ABI of invocations to a method's interpreter entry point. @@ -36,12 +37,10 @@ enum EntryPointCallingConvention { }; // Create code that will invoke the function held in thread local storage. -std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa, - EntryPointCallingConvention abi, - ThreadOffset32 entry_point_offset); -std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa, - EntryPointCallingConvention abi, - ThreadOffset64 entry_point_offset); +EXPORT std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32( + InstructionSet isa, EntryPointCallingConvention abi, ThreadOffset32 entry_point_offset); +EXPORT std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64( + InstructionSet isa, EntryPointCallingConvention abi, ThreadOffset64 entry_point_offset); } // namespace art diff --git a/compiler/utils/arm/assembler_arm_shared.h b/compiler/utils/arm/assembler_arm_shared.h deleted file mode 100644 index 7464052d93..0000000000 --- a/compiler/utils/arm/assembler_arm_shared.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_SHARED_H_ -#define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_SHARED_H_ - -namespace art { -namespace arm { - -enum LoadOperandType { - kLoadSignedByte, - kLoadUnsignedByte, - kLoadSignedHalfword, - kLoadUnsignedHalfword, - kLoadWord, - kLoadWordPair, - kLoadSWord, - kLoadDWord -}; - -enum StoreOperandType { - kStoreByte, - kStoreHalfword, - kStoreWord, - kStoreWordPair, - kStoreSWord, - kStoreDWord -}; - -} // namespace arm -} // namespace art - -#endif // ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_SHARED_H_ diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index 77f5d7081a..c7ca003530 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -26,7 +26,7 @@ using namespace vixl::aarch32; // NOLINT(build/namespaces) -namespace art { +namespace art HIDDEN { namespace arm { #ifdef ___ @@ -81,9 +81,7 @@ void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) { } void ArmVIXLAssembler::GenerateMarkingRegisterCheck(vixl32::Register temp, int code) { - // The Marking Register is only used in the Baker read barrier configuration. - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(kReserveMarkingRegister); vixl32::Label mr_is_ok; diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index 5bc8a70280..741119d7f7 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -19,15 +19,12 @@ #include <android-base/logging.h> -#include "base/arena_containers.h" #include "base/macros.h" #include "constants_arm.h" #include "dwarf/register.h" #include "offsets.h" -#include "utils/arm/assembler_arm_shared.h" #include "utils/arm/managed_register_arm.h" #include "utils/assembler.h" -#include "utils/jni_macro_assembler.h" // TODO(VIXL): Make VIXL compile with -Wshadow and remove pragmas. #pragma GCC diagnostic push @@ -37,7 +34,7 @@ namespace vixl32 = vixl::aarch32; -namespace art { +namespace art HIDDEN { namespace arm { inline dwarf::Reg DWARFReg(vixl32::Register reg) { @@ -48,6 +45,26 @@ inline dwarf::Reg DWARFReg(vixl32::SRegister reg) { return dwarf::Reg::ArmFp(static_cast<int>(reg.GetCode())); } +enum LoadOperandType { + kLoadSignedByte, + kLoadUnsignedByte, + kLoadSignedHalfword, + kLoadUnsignedHalfword, + kLoadWord, + kLoadWordPair, + kLoadSWord, + kLoadDWord +}; + +enum StoreOperandType { + kStoreByte, + kStoreHalfword, + kStoreWord, + kStoreWordPair, + kStoreSWord, + kStoreDWord +}; + class ArmVIXLMacroAssembler final : public vixl32::MacroAssembler { public: // Most methods fit in a 1KB code buffer, which results in more optimal alloc/realloc and diff --git a/compiler/utils/arm/constants_arm.cc b/compiler/utils/arm/constants_arm.cc index b02b343b26..a927fc201a 100644 --- a/compiler/utils/arm/constants_arm.cc +++ b/compiler/utils/arm/constants_arm.cc @@ -16,7 +16,7 @@ #include "constants_arm.h" -namespace art { +namespace art HIDDEN { namespace arm { std::ostream& operator<<(std::ostream& os, const DRegister& rhs) { diff --git a/compiler/utils/arm/constants_arm.h b/compiler/utils/arm/constants_arm.h index f42fd9777b..ef6d48dd3b 100644 --- a/compiler/utils/arm/constants_arm.h +++ b/compiler/utils/arm/constants_arm.h @@ -26,8 +26,9 @@ #include "arch/arm/registers_arm.h" #include "base/casts.h" #include "base/globals.h" +#include "base/macros.h" -namespace art { +namespace art HIDDEN { namespace arm { // Defines constants and accessor classes to assemble, disassemble and diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc index 6e6d40dc92..54873454eb 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -20,6 +20,7 @@ #include <type_traits> #include "entrypoints/quick/quick_entrypoints.h" +#include "indirect_reference_table.h" #include "lock_word.h" #include "thread.h" @@ -27,9 +28,8 @@ using namespace vixl::aarch32; // NOLINT(build/namespaces) namespace vixl32 = vixl::aarch32; using vixl::ExactAssemblyScope; -using vixl::CodeBufferCheckScope; -namespace art { +namespace art HIDDEN { namespace arm { #ifdef ___ @@ -155,7 +155,7 @@ void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size, // Pop LR to PC unless we need to emit some read barrier code just before returning. bool emit_code_before_return = - (kEmitCompilerReadBarrier && kUseBakerReadBarrier) && + (gUseReadBarrier && kUseBakerReadBarrier) && (may_suspend || (kIsDebugBuild && emit_run_time_checks_in_debug_mode_)); if ((core_spill_mask & (1u << lr.GetCode())) != 0u && !emit_code_before_return) { DCHECK_EQ(core_spill_mask & (1u << pc.GetCode()), 0u); @@ -215,7 +215,9 @@ void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size, } } - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Emit marking register refresh even with all GCs as we are still using the + // register due to nterp's dependency. + if (kReserveMarkingRegister) { if (may_suspend) { // The method may be suspended; refresh the Marking Register. ___ Ldr(mr, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value())); @@ -305,13 +307,6 @@ void ArmVIXLJNIMacroAssembler::Store(ManagedRegister m_base, } } -void ArmVIXLJNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { - vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm()); - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(src); - asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value()); -} - void ArmVIXLJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); @@ -319,70 +314,6 @@ void ArmVIXLJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msr asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value()); } -void ArmVIXLJNIMacroAssembler::StoreSpanning(FrameOffset dest, - ManagedRegister msrc, - FrameOffset in_off) { - vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm()); - asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value()); - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - vixl32::Register scratch = temps.Acquire(); - asm_.LoadFromOffset(kLoadWord, scratch, sp, in_off.Int32Value()); - asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value() + 4); -} - -void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) { - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - vixl32::Register scratch = temps.Acquire(); - asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value()); - asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); -} - -void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) { - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - vixl32::Register scratch = temps.Acquire(); - asm_.LoadFromOffset(kLoadWord, scratch, AsVIXLRegister(base.AsArm()), offs.Int32Value()); - if (unpoison_reference) { - asm_.MaybeUnpoisonHeapReference(scratch); - } - asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); -} - -void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister mdest, - ManagedRegister mbase, - MemberOffset offs, - bool unpoison_reference) { - vixl::aarch32::Register dest = AsVIXLRegister(mdest.AsArm()); - vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm()); - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(dest, base); - asm_.LoadFromOffset(kLoadWord, dest, base, offs.Int32Value()); - - if (unpoison_reference) { - asm_.MaybeUnpoisonHeapReference(dest); - } -} - -void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister dest ATTRIBUTE_UNUSED, - FrameOffset src ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void ArmVIXLJNIMacroAssembler::LoadRawPtr(ManagedRegister dest ATTRIBUTE_UNUSED, - ManagedRegister base ATTRIBUTE_UNUSED, - Offset offs ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void ArmVIXLJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) { - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - vixl32::Register scratch = temps.Acquire(); - asm_.LoadImmediate(scratch, imm); - asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); -} - void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { return Load(m_dst.AsArm(), sp, src.Int32Value(), size); } @@ -394,11 +325,6 @@ void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst, return Load(m_dst.AsArm(), AsVIXLRegister(m_base.AsArm()), offs.Int32Value(), size); } -void ArmVIXLJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, - ThreadOffset32 src, - size_t size) { - return Load(m_dst.AsArm(), tr, src.Int32Value(), size); -} void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) { vixl::aarch32::Register dest = AsVIXLRegister(mdest.AsArm()); @@ -407,29 +333,15 @@ void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, Threa asm_.LoadFromOffset(kLoadWord, dest, tr, offs.Int32Value()); } -void ArmVIXLJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) { - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - vixl32::Register scratch = temps.Acquire(); - asm_.LoadFromOffset(kLoadWord, scratch, tr, thr_offs.Int32Value()); - asm_.StoreToOffset(kStoreWord, scratch, sp, fr_offs.Int32Value()); -} - -void ArmVIXLJNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs ATTRIBUTE_UNUSED, - FrameOffset fr_offs ATTRIBUTE_UNUSED, - ManagedRegister mscratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void ArmVIXLJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, - FrameOffset fr_offs) { - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - vixl32::Register scratch = temps.Acquire(); - asm_.AddConstant(scratch, sp, fr_offs.Int32Value()); - asm_.StoreToOffset(kStoreWord, scratch, tr, thr_offs.Int32Value()); -} - -void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) { - asm_.StoreToOffset(kStoreWord, sp, tr, thr_offs.Int32Value()); +void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) { + if (tag_sp) { + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + vixl32::Register reg = temps.Acquire(); + ___ Orr(reg, sp, 0x2); + asm_.StoreToOffset(kStoreWord, reg, tr, thr_offs.Int32Value()); + } else { + asm_.StoreToOffset(kStoreWord, sp, tr, thr_offs.Int32Value()); + } } void ArmVIXLJNIMacroAssembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, @@ -869,6 +781,11 @@ void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst, } } +void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst, size_t value) { + ArmManagedRegister dst = mdst.AsArm(); + ___ Mov(AsVIXLRegister(dst), static_cast<uint32_t>(value)); +} + void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) { DCHECK(size == 4 || size == 8) << size; UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); @@ -884,48 +801,6 @@ void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t si } } -void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, - ManagedRegister src_base ATTRIBUTE_UNUSED, - Offset src_offset ATTRIBUTE_UNUSED, - ManagedRegister mscratch ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void ArmVIXLJNIMacroAssembler::Copy(ManagedRegister dest_base ATTRIBUTE_UNUSED, - Offset dest_offset ATTRIBUTE_UNUSED, - FrameOffset src ATTRIBUTE_UNUSED, - ManagedRegister mscratch ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dst ATTRIBUTE_UNUSED, - FrameOffset src_base ATTRIBUTE_UNUSED, - Offset src_offset ATTRIBUTE_UNUSED, - ManagedRegister mscratch ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void ArmVIXLJNIMacroAssembler::Copy(ManagedRegister dest ATTRIBUTE_UNUSED, - Offset dest_offset ATTRIBUTE_UNUSED, - ManagedRegister src ATTRIBUTE_UNUSED, - Offset src_offset ATTRIBUTE_UNUSED, - ManagedRegister mscratch ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dst ATTRIBUTE_UNUSED, - Offset dest_offset ATTRIBUTE_UNUSED, - FrameOffset src ATTRIBUTE_UNUSED, - Offset src_offset ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - void ArmVIXLJNIMacroAssembler::CreateJObject(ManagedRegister mout_reg, FrameOffset spilled_reference_offset, ManagedRegister min_reg, @@ -971,33 +846,19 @@ void ArmVIXLJNIMacroAssembler::CreateJObject(ManagedRegister mout_reg, } } -void ArmVIXLJNIMacroAssembler::CreateJObject(FrameOffset out_off, - FrameOffset spilled_reference_offset, - bool null_allowed) { - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - vixl32::Register scratch = temps.Acquire(); - if (null_allowed) { - asm_.LoadFromOffset(kLoadWord, scratch, sp, spilled_reference_offset.Int32Value()); - // Null values get a jobject value null. Otherwise, the jobject is - // the address of the spilled reference. - // e.g. scratch = (scratch == 0) ? 0 : (SP+spilled_reference_offset) - ___ Cmp(scratch, 0); - - // FIXME: Using 32-bit T32 instruction in IT-block is deprecated. - if (asm_.ShifterOperandCanHold(ADD, spilled_reference_offset.Int32Value())) { - ExactAssemblyScope guard(asm_.GetVIXLAssembler(), - 2 * vixl32::kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - ___ it(ne, 0x8); - asm_.AddConstantInIt(scratch, sp, spilled_reference_offset.Int32Value(), ne); - } else { - // TODO: Implement this (old arm assembler would have crashed here). - UNIMPLEMENTED(FATAL); - } - } else { - asm_.AddConstant(scratch, sp, spilled_reference_offset.Int32Value()); - } - asm_.StoreToOffset(kStoreWord, scratch, sp, out_off.Int32Value()); +void ArmVIXLJNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister mreg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) { + constexpr uint32_t kGlobalOrWeakGlobalMask = + dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetGlobalOrWeakGlobalMask()); + constexpr uint32_t kIndirectRefKindMask = + dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetIndirectRefKindMask()); + vixl32::Register reg = AsVIXLRegister(mreg.AsArm()); + ___ Tst(reg, kGlobalOrWeakGlobalMask); + ___ B(ne, ArmVIXLJNIMacroLabel::Cast(slow_path)->AsArm()); + ___ Bics(reg, reg, kIndirectRefKindMask); + ___ B(eq, ArmVIXLJNIMacroLabel::Cast(resume)->AsArm()); // Skip load for null. + ___ Ldr(reg, MemOperand(reg)); } void ArmVIXLJNIMacroAssembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED, @@ -1165,7 +1026,7 @@ void ArmVIXLJNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnary UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register test_reg; DCHECK_EQ(Thread::IsGcMarkingSize(), 4u); - DCHECK(kUseReadBarrier); + DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // TestGcMarking() is used in the JNI stub entry when the marking register is up to date. if (kIsDebugBuild && emit_run_time_checks_in_debug_mode_) { @@ -1213,15 +1074,19 @@ void ArmVIXLJNIMacroAssembler::TestMarkBit(ManagedRegister mref, } } +void ArmVIXLJNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) { + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + vixl32::Register scratch = temps.Acquire(); + ___ Mov(scratch, static_cast<uint32_t>(address)); + ___ Ldrb(scratch, MemOperand(scratch, 0)); + ___ CompareAndBranchIfNonZero(scratch, ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); +} + void ArmVIXLJNIMacroAssembler::Bind(JNIMacroLabel* label) { CHECK(label != nullptr); ___ Bind(ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); } -void ArmVIXLJNIMacroAssembler::MemoryBarrier(ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - void ArmVIXLJNIMacroAssembler::Load(ArmManagedRegister dest, vixl32::Register base, int32_t offset, @@ -1243,6 +1108,8 @@ void ArmVIXLJNIMacroAssembler::Load(ArmManagedRegister dest, } } else if (dest.IsRegisterPair()) { CHECK_EQ(8u, size) << dest; + // TODO: Use LDRD to improve stubs for @CriticalNative methods with parameters + // (long, long, ...). A single 32-bit LDRD is presumably faster than two 16-bit LDRs. ___ Ldr(AsVIXLRegisterPairLow(dest), MemOperand(base, offset)); ___ Ldr(AsVIXLRegisterPairHigh(dest), MemOperand(base, offset + 4)); } else if (dest.IsSRegister()) { diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h index ed453ae8ff..f6df7f2c53 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h @@ -23,13 +23,12 @@ #include "base/macros.h" #include "constants_arm.h" #include "offsets.h" -#include "utils/arm/assembler_arm_shared.h" #include "utils/arm/assembler_arm_vixl.h" #include "utils/arm/managed_register_arm.h" #include "utils/assembler.h" #include "utils/jni_macro_assembler.h" -namespace art { +namespace art HIDDEN { namespace arm { class ArmVIXLJNIMacroAssembler final @@ -63,34 +62,14 @@ class ArmVIXLJNIMacroAssembler final // Store routines. void Store(FrameOffset offs, ManagedRegister src, size_t size) override; void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override; - void StoreRef(FrameOffset dest, ManagedRegister src) override; void StoreRawPtr(FrameOffset dest, ManagedRegister src) override; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override; - - void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) override; - - void StoreStackPointerToThread(ThreadOffset32 thr_offs) override; - - void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override; + void StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) override; // Load routines. void Load(ManagedRegister dest, FrameOffset src, size_t size) override; void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override; - void LoadFromThread(ManagedRegister dest, - ThreadOffset32 src, - size_t size) override; - - void LoadRef(ManagedRegister dest, FrameOffset src) override; - - void LoadRef(ManagedRegister dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) override; - - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override; - void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) override; // Copying routines. @@ -100,51 +79,7 @@ class ArmVIXLJNIMacroAssembler final void Move(ManagedRegister dest, ManagedRegister src, size_t size) override; - void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override; - - void CopyRawPtrToThread(ThreadOffset32 thr_offs, - FrameOffset fr_offs, - ManagedRegister scratch) override; - - void CopyRef(FrameOffset dest, FrameOffset src) override; - void CopyRef(FrameOffset dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) override; - - void Copy(FrameOffset dest, FrameOffset src, size_t size) override; - - void Copy(FrameOffset dest, - ManagedRegister src_base, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; - - void Copy(ManagedRegister dest_base, - Offset dest_offset, - FrameOffset src, - ManagedRegister scratch, - size_t size) override; - - void Copy(FrameOffset dest, - FrameOffset src_base, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; - - void Copy(ManagedRegister dest, - Offset dest_offset, - ManagedRegister src, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; - - void Copy(FrameOffset dest, - Offset dest_offset, - FrameOffset src, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; + void Move(ManagedRegister dest, size_t value) override; // Sign extension. void SignExtend(ManagedRegister mreg, size_t size) override; @@ -156,20 +91,10 @@ class ArmVIXLJNIMacroAssembler final void GetCurrentThread(ManagedRegister dest) override; void GetCurrentThread(FrameOffset dest_offset) override; - // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value), - // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly - // stale reference that can be used to avoid loading the spilled value to - // see if the value is null. - void CreateJObject(ManagedRegister out_reg, - FrameOffset spilled_reference_offset, - ManagedRegister in_reg, - bool null_allowed) override; - - // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value), - // or to be null if the value is null and `null_allowed`. - void CreateJObject(FrameOffset out_off, - FrameOffset spilled_reference_offset, - bool null_allowed) override; + // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path. + void DecodeJNITransitionOrLocalJObject(ManagedRegister reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) override; // Heap::VerifyObject on src. In some cases (such as a reference to this) we // know that src may not be null. @@ -213,17 +138,28 @@ class ArmVIXLJNIMacroAssembler final void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; // Emit a conditional jump to the label by applying a unary condition test to object's mark bit. void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; + // Emit a conditional jump to label if the loaded value from specified locations is not zero. + void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override; // Code at this offset will serve as the target for the Jump call. void Bind(JNIMacroLabel* label) override; - void MemoryBarrier(ManagedRegister scratch) override; - + private: + void Copy(FrameOffset dest, FrameOffset src, size_t size); void Load(ArmManagedRegister dest, vixl32::Register base, int32_t offset, size_t size); - private: + // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value), + // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly + // stale reference that can be used to avoid loading the spilled value to + // see if the value is null. + void CreateJObject(ManagedRegister out_reg, + FrameOffset spilled_reference_offset, + ManagedRegister in_reg, + bool null_allowed); + // Used for testing. - friend class ArmVIXLAssemblerTest_VixlLoadFromOffset_Test; - friend class ArmVIXLAssemblerTest_VixlStoreToOffset_Test; + ART_FRIEND_TEST(ArmVIXLAssemblerTest, VixlJniHelpers); + ART_FRIEND_TEST(ArmVIXLAssemblerTest, VixlLoadFromOffset); + ART_FRIEND_TEST(ArmVIXLAssemblerTest, VixlStoreToOffset); }; class ArmVIXLJNIMacroLabel final diff --git a/compiler/utils/arm/managed_register_arm.cc b/compiler/utils/arm/managed_register_arm.cc index deff658b4f..07d50da910 100644 --- a/compiler/utils/arm/managed_register_arm.cc +++ b/compiler/utils/arm/managed_register_arm.cc @@ -18,7 +18,7 @@ #include "base/globals.h" -namespace art { +namespace art HIDDEN { namespace arm { // Returns true if this managed-register overlaps the other managed-register. diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h index 6d942fa774..b3d436c10f 100644 --- a/compiler/utils/arm/managed_register_arm.h +++ b/compiler/utils/arm/managed_register_arm.h @@ -19,10 +19,11 @@ #include <android-base/logging.h> +#include "base/macros.h" #include "constants_arm.h" #include "utils/managed_register.h" -namespace art { +namespace art HIDDEN { namespace arm { // Values for register pairs. diff --git a/compiler/utils/arm/managed_register_arm_test.cc b/compiler/utils/arm/managed_register_arm_test.cc index 6f440a7c81..60f6090edd 100644 --- a/compiler/utils/arm/managed_register_arm_test.cc +++ b/compiler/utils/arm/managed_register_arm_test.cc @@ -16,9 +16,10 @@ #include "managed_register_arm.h" #include "base/globals.h" +#include "base/macros.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { namespace arm { TEST(ArmManagedRegister, NoRegister) { diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 6100ed9855..26dce7c502 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -16,7 +16,6 @@ #include "arch/arm64/instruction_set_features_arm64.h" #include "assembler_arm64.h" -#include "base/bit_utils_iterator.h" #include "entrypoints/quick/quick_entrypoints.h" #include "heap_poisoning.h" #include "offsets.h" @@ -24,7 +23,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) -namespace art { +namespace art HIDDEN { namespace arm64 { #ifdef ___ @@ -187,9 +186,7 @@ void Arm64Assembler::MaybeUnpoisonHeapReference(Register reg) { } void Arm64Assembler::GenerateMarkingRegisterCheck(Register temp, int code) { - // The Marking Register is only used in the Baker read barrier configuration. - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(kReserveMarkingRegister); vixl::aarch64::Register mr = reg_x(MR); // Marking Register. vixl::aarch64::Register tr = reg_x(TR); // Thread Register. diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index b49a13a067..f8168903bd 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -23,7 +23,6 @@ #include <android-base/logging.h> -#include "base/arena_containers.h" #include "base/bit_utils_iterator.h" #include "base/macros.h" #include "dwarf/register.h" @@ -38,7 +37,7 @@ #include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { class Arm64InstructionSetFeatures; diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc index 50ca468499..9e9f122cf6 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -17,6 +17,7 @@ #include "jni_macro_assembler_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "indirect_reference_table.h" #include "lock_word.h" #include "managed_register_arm64.h" #include "offsets.h" @@ -24,7 +25,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) -namespace art { +namespace art HIDDEN { namespace arm64 { #ifdef ___ @@ -191,46 +192,22 @@ void Arm64JNIMacroAssembler::Store(ManagedRegister m_base, } } -void Arm64JNIMacroAssembler::StoreRef(FrameOffset offs, ManagedRegister m_src) { - Arm64ManagedRegister src = m_src.AsArm64(); - CHECK(src.IsXRegister()) << src; - StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP, - offs.Int32Value()); -} - void Arm64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) { Arm64ManagedRegister src = m_src.AsArm64(); CHECK(src.IsXRegister()) << src; StoreToOffset(src.AsXRegister(), SP, offs.Int32Value()); } -void Arm64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset offs, uint32_t imm) { - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - Register scratch = temps.AcquireW(); - ___ Mov(scratch, imm); - ___ Str(scratch, MEM_OP(reg_x(SP), offs.Int32Value())); -} - -void Arm64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs, FrameOffset fr_offs) { - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - Register scratch = temps.AcquireX(); - ___ Add(scratch, reg_x(SP), fr_offs.Int32Value()); - ___ Str(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value())); -} - -void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) { +void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs, bool tag_sp) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); Register scratch = temps.AcquireX(); ___ Mov(scratch, reg_x(SP)); + if (tag_sp) { + ___ Orr(scratch, scratch, 0x2); + } ___ Str(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value())); } -void Arm64JNIMacroAssembler::StoreSpanning(FrameOffset dest_off ATTRIBUTE_UNUSED, - ManagedRegister m_source ATTRIBUTE_UNUSED, - FrameOffset in_off ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); // This case is not applicable to ARM64. -} - // Load routines. void Arm64JNIMacroAssembler::LoadImmediate(XRegister dest, int32_t value, Condition cond) { if ((cond == al) || (cond == nv)) { @@ -329,45 +306,6 @@ void Arm64JNIMacroAssembler::Load(ManagedRegister m_dst, return Load(m_dst.AsArm64(), m_base.AsArm64().AsXRegister(), offs.Int32Value(), size); } -void Arm64JNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, - ThreadOffset64 src, - size_t size) { - return Load(m_dst.AsArm64(), TR, src.Int32Value(), size); -} - -void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) { - Arm64ManagedRegister dst = m_dst.AsArm64(); - CHECK(dst.IsXRegister()) << dst; - LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value()); -} - -void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst, - ManagedRegister m_base, - MemberOffset offs, - bool unpoison_reference) { - Arm64ManagedRegister dst = m_dst.AsArm64(); - Arm64ManagedRegister base = m_base.AsArm64(); - CHECK(dst.IsXRegister() && base.IsXRegister()); - LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(), - offs.Int32Value()); - if (unpoison_reference) { - WRegister ref_reg = dst.AsOverlappingWRegister(); - asm_.MaybeUnpoisonHeapReference(reg_w(ref_reg)); - } -} - -void Arm64JNIMacroAssembler::LoadRawPtr(ManagedRegister m_dst, - ManagedRegister m_base, - Offset offs) { - Arm64ManagedRegister dst = m_dst.AsArm64(); - Arm64ManagedRegister base = m_base.AsArm64(); - CHECK(dst.IsXRegister() && base.IsXRegister()); - // Remove dst and base form the temp list - higher level API uses IP1, IP0. - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister())); - ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value())); -} - void Arm64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset64 offs) { Arm64ManagedRegister dst = m_dst.AsArm64(); CHECK(dst.IsXRegister()) << dst; @@ -640,40 +578,10 @@ void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, } } -void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 tr_offs) { - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - Register scratch = temps.AcquireX(); - ___ Ldr(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value())); - ___ Str(scratch, MEM_OP(sp, fr_offs.Int32Value())); -} - -void Arm64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 tr_offs, - FrameOffset fr_offs, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value()); - StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); -} - -void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) { - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - Register scratch = temps.AcquireW(); - ___ Ldr(scratch, MEM_OP(reg_x(SP), src.Int32Value())); - ___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value())); -} - -void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) { - UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - Register scratch = temps.AcquireW(); - ___ Ldr(scratch, MEM_OP(reg_x(base.AsArm64().AsXRegister()), offs.Int32Value())); - if (unpoison_reference) { - asm_.MaybeUnpoisonHeapReference(scratch); - } - ___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value())); +void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, size_t value) { + Arm64ManagedRegister dst = m_dst.AsArm64(); + DCHECK(dst.IsXRegister()); + ___ Mov(reg_x(dst.AsXRegister()), value); } void Arm64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) { @@ -684,105 +592,6 @@ void Arm64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size ___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value())); } -void Arm64JNIMacroAssembler::Copy(FrameOffset dest, - ManagedRegister src_base, - Offset src_offset, - ManagedRegister m_scratch, - size_t size) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - Arm64ManagedRegister base = src_base.AsArm64(); - CHECK(base.IsXRegister()) << base; - CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(), - src_offset.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value()); - } else if (size == 8) { - LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value()); - StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value()); - } else { - UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; - } -} - -void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest_base, - Offset dest_offs, - FrameOffset src, - ManagedRegister m_scratch, - size_t size) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - Arm64ManagedRegister base = m_dest_base.AsArm64(); - CHECK(base.IsXRegister()) << base; - CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(), - dest_offs.Int32Value()); - } else if (size == 8) { - LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value()); - StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value()); - } else { - UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; - } -} - -void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/, - FrameOffset /*src_base*/, - Offset /*src_offset*/, - ManagedRegister /*mscratch*/, - size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant"; -} - -void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest, - Offset dest_offset, - ManagedRegister m_src, - Offset src_offset, - ManagedRegister m_scratch, - size_t size) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - Arm64ManagedRegister src = m_src.AsArm64(); - Arm64ManagedRegister dest = m_dest.AsArm64(); - CHECK(dest.IsXRegister()) << dest; - CHECK(src.IsXRegister()) << src; - CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - if (scratch.IsWRegister()) { - LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(), - src_offset.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(), - dest_offset.Int32Value()); - } else { - LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(), - src_offset.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(), - dest_offset.Int32Value()); - } - } else if (size == 8) { - LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value()); - StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value()); - } else { - UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; - } -} - -void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/, - Offset /*dest_offset*/, - FrameOffset /*src*/, - Offset /*src_offset*/, - ManagedRegister /*scratch*/, - size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant"; -} - -void Arm64JNIMacroAssembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) { - // TODO: Should we check that m_scratch is IP? - see arm. - ___ Dmb(InnerShareable, BarrierAll); -} - void Arm64JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) { Arm64ManagedRegister reg = mreg.AsArm64(); CHECK(size == 1 || size == 2) << size; @@ -882,6 +691,19 @@ void Arm64JNIMacroAssembler::CreateJObject(FrameOffset out_off, ___ Str(scratch, MEM_OP(reg_x(SP), out_off.Int32Value())); } +void Arm64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister m_reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) { + constexpr uint64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask(); + constexpr uint64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask(); + constexpr size_t kGlobalOrWeakGlobalBit = WhichPowerOf2(kGlobalOrWeakGlobalMask); + Register reg = reg_w(m_reg.AsArm64().AsWRegister()); + ___ Tbnz(reg.X(), kGlobalOrWeakGlobalBit, Arm64JNIMacroLabel::Cast(slow_path)->AsArm64()); + ___ And(reg.X(), reg.X(), ~kIndirectRefKindMask); + ___ Cbz(reg.X(), Arm64JNIMacroLabel::Cast(resume)->AsArm64()); // Skip load for null. + ___ Ldr(reg, MEM_OP(reg.X())); +} + void Arm64JNIMacroAssembler::TryToTransitionFromRunnableToNative( JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) { constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); @@ -989,7 +811,7 @@ void Arm64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCo UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); Register test_reg; DCHECK_EQ(Thread::IsGcMarkingSize(), 4u); - DCHECK(kUseReadBarrier); + DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // TestGcMarking() is used in the JNI stub entry when the marking register is up to date. if (kIsDebugBuild && emit_run_time_checks_in_debug_mode_) { @@ -1037,6 +859,14 @@ void Arm64JNIMacroAssembler::TestMarkBit(ManagedRegister m_ref, } } +void Arm64JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) { + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + Register scratch = temps.AcquireX(); + ___ Mov(scratch, address); + ___ Ldrb(scratch.W(), MEM_OP(scratch, 0)); + ___ Cbnz(scratch.W(), Arm64JNIMacroLabel::Cast(label)->AsArm64()); +} + void Arm64JNIMacroAssembler::Bind(JNIMacroLabel* label) { CHECK(label != nullptr); ___ Bind(Arm64JNIMacroLabel::Cast(label)->AsArm64()); @@ -1107,7 +937,9 @@ void Arm64JNIMacroAssembler::RemoveFrame(size_t frame_size, asm_.UnspillRegisters(core_reg_list, frame_size - core_reg_size); asm_.UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Emit marking register refresh even with all GCs as we are still using the + // register due to nterp's dependency. + if (kReserveMarkingRegister) { vixl::aarch64::Register mr = reg_x(MR); // Marking Register. vixl::aarch64::Register tr = reg_x(TR); // Thread Register. diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h index 2c04184848..2836e0947d 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.h +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h @@ -37,7 +37,7 @@ #include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { namespace arm64 { class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler, PointerSize::k64> { @@ -68,23 +68,12 @@ class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler, // Store routines. void Store(FrameOffset offs, ManagedRegister src, size_t size) override; void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override; - void StoreRef(FrameOffset dest, ManagedRegister src) override; void StoreRawPtr(FrameOffset dest, ManagedRegister src) override; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override; - void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs) override; - void StoreStackPointerToThread(ThreadOffset64 thr_offs) override; - void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override; + void StoreStackPointerToThread(ThreadOffset64 thr_offs, bool tag_sp) override; // Load routines. void Load(ManagedRegister dest, FrameOffset src, size_t size) override; void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override; - void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) override; - void LoadRef(ManagedRegister dest, FrameOffset src) override; - void LoadRef(ManagedRegister dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) override; - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override; void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override; // Copying routines. @@ -92,43 +81,7 @@ class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler, ArrayRef<ArgumentLocation> srcs, ArrayRef<FrameOffset> refs) override; void Move(ManagedRegister dest, ManagedRegister src, size_t size) override; - void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override; - void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - override; - void CopyRef(FrameOffset dest, FrameOffset src) override; - void CopyRef(FrameOffset dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) override; - void Copy(FrameOffset dest, FrameOffset src, size_t size) override; - void Copy(FrameOffset dest, - ManagedRegister src_base, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; - void Copy(ManagedRegister dest_base, - Offset dest_offset, - FrameOffset src, - ManagedRegister scratch, - size_t size) override; - void Copy(FrameOffset dest, - FrameOffset src_base, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; - void Copy(ManagedRegister dest, - Offset dest_offset, - ManagedRegister src, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; - void Copy(FrameOffset dest, - Offset dest_offset, - FrameOffset src, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; - void MemoryBarrier(ManagedRegister scratch) override; + void Move(ManagedRegister dest, size_t value) override; // Sign extension. void SignExtend(ManagedRegister mreg, size_t size) override; @@ -140,20 +93,10 @@ class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler, void GetCurrentThread(ManagedRegister dest) override; void GetCurrentThread(FrameOffset dest_offset) override; - // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value), - // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly - // stale reference that can be used to avoid loading the spilled value to - // see if the value is null. - void CreateJObject(ManagedRegister out_reg, - FrameOffset spilled_reference_offset, - ManagedRegister in_reg, - bool null_allowed) override; - - // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value), - // or to be null if the value is null and `null_allowed`. - void CreateJObject(FrameOffset out_off, - FrameOffset spilled_reference_offset, - bool null_allowed) override; + // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path. + void DecodeJNITransitionOrLocalJObject(ManagedRegister reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) override; // Heap::VerifyObject on src. In some cases (such as a reference to this) we // know that src may not be null. @@ -197,6 +140,8 @@ class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler, void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; // Emit a conditional jump to the label by applying a unary condition test to object's mark bit. void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; + // Emit a conditional jump to label if the loaded value from specified locations is not zero. + void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override; // Code at this offset will serve as the target for the Jump call. void Bind(JNIMacroLabel* label) override; @@ -220,6 +165,24 @@ class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler, void LoadFromOffset(XRegister dest, XRegister base, int32_t offset); void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset); void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset); + + void Copy(FrameOffset dest, FrameOffset src, size_t size); + + // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value), + // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly + // stale reference that can be used to avoid loading the spilled value to + // see if the value is null. + void CreateJObject(ManagedRegister out_reg, + FrameOffset spilled_reference_offset, + ManagedRegister in_reg, + bool null_allowed); + + // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value), + // or to be null if the value is null and `null_allowed`. + void CreateJObject(FrameOffset out_off, + FrameOffset spilled_reference_offset, + bool null_allowed); + void AddConstant(XRegister rd, int32_t value, vixl::aarch64::Condition cond = vixl::aarch64::al); diff --git a/compiler/utils/arm64/managed_register_arm64.cc b/compiler/utils/arm64/managed_register_arm64.cc index 5632265646..74a35452db 100644 --- a/compiler/utils/arm64/managed_register_arm64.cc +++ b/compiler/utils/arm64/managed_register_arm64.cc @@ -17,7 +17,7 @@ #include "managed_register_arm64.h" #include "base/globals.h" -namespace art { +namespace art HIDDEN { namespace arm64 { // TODO: Define convention diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h index 8a06f631a1..7e8c976b23 100644 --- a/compiler/utils/arm64/managed_register_arm64.h +++ b/compiler/utils/arm64/managed_register_arm64.h @@ -20,9 +20,10 @@ #include <android-base/logging.h> #include "arch/arm64/registers_arm64.h" +#include "base/macros.h" #include "utils/managed_register.h" -namespace art { +namespace art HIDDEN { namespace arm64 { const int kNumberOfXRegIds = kNumberOfXRegisters; diff --git a/compiler/utils/arm64/managed_register_arm64_test.cc b/compiler/utils/arm64/managed_register_arm64_test.cc index d151ac99e7..f250360639 100644 --- a/compiler/utils/arm64/managed_register_arm64_test.cc +++ b/compiler/utils/arm64/managed_register_arm64_test.cc @@ -18,9 +18,10 @@ #include "assembler_arm64.h" #include "base/globals.h" +#include "base/macros.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { namespace arm64 { TEST(Arm64ManagedRegister, NoRegister) { diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index d1d2a3d556..b82f0dc4b4 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -23,7 +23,7 @@ #include "base/globals.h" #include "base/memory_region.h" -namespace art { +namespace art HIDDEN { AssemblerBuffer::AssemblerBuffer(ArenaAllocator* allocator) : allocator_(allocator) { diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 4b4fb14df6..13a5d9fd01 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -37,7 +37,7 @@ #include "x86/constants_x86.h" #include "x86_64/constants_x86_64.h" -namespace art { +namespace art HIDDEN { class Assembler; class AssemblerBuffer; diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index bb22fe5bde..d03e5a7abc 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -26,11 +26,12 @@ #include <fstream> #include <iterator> +#include "base/macros.h" #include "base/malloc_arena_pool.h" #include "assembler_test_base.h" #include "common_runtime_test.h" // For ScratchFile -namespace art { +namespace art HIDDEN { // Helper for a constexpr string length. constexpr size_t ConstexprStrLen(char const* str, size_t count = 0) { @@ -59,7 +60,7 @@ class AssemblerTest : public AssemblerTestBase { return assembler_.get(); } - typedef std::string (*TestFn)(AssemblerTest* assembler_test, Ass* assembler); + using TestFn = std::string (*)(AssemblerTest *, Ass *); void DriverFn(TestFn f, const std::string& test_name) { DriverWrapper(f(this, assembler_.get()), test_name); @@ -259,7 +260,7 @@ class AssemblerTest : public AssemblerTestBase { std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), std::string (AssemblerTest::*GetName3)(const Reg3&), - std::string fmt, + const std::string& fmt, int bias) { std::string str; std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h index bf73808603..73f3657413 100644 --- a/compiler/utils/assembler_test_base.h +++ b/compiler/utils/assembler_test_base.h @@ -26,6 +26,7 @@ #include "android-base/strings.h" +#include "base/macros.h" #include "base/os.h" #include "base/utils.h" #include "common_runtime_test.h" // For ScratchDir. @@ -34,7 +35,7 @@ #include "exec_utils.h" #include "stream/file_output_stream.h" -namespace art { +namespace art HIDDEN { // If you want to take a look at the differences between the ART assembler and clang, // set this flag to true. The disassembled files will then remain in the tmp directory. @@ -59,7 +60,7 @@ class AssemblerTestBase : public testing::Test { // This is intended to be run as a test. bool CheckTools() { - for (auto cmd : { GetAssemblerCommand()[0], GetDisassemblerCommand()[0] }) { + for (const std::string& cmd : { GetAssemblerCommand()[0], GetDisassemblerCommand()[0] }) { if (!OS::FileExists(cmd.c_str())) { LOG(ERROR) << "Could not find " << cmd; return false; @@ -84,7 +85,7 @@ class AssemblerTestBase : public testing::Test { // Assemble reference object file. std::string ref_obj_file = test_path(".ref.o"); - ASSERT_TRUE(Assemble(ref_asm_file.c_str(), ref_obj_file.c_str())); + ASSERT_TRUE(Assemble(ref_asm_file, ref_obj_file)); // Read the code produced by assembler from the ELF file. std::vector<uint8_t> ref_code; @@ -153,9 +154,14 @@ class AssemblerTestBase : public testing::Test { virtual std::vector<std::string> GetDisassemblerCommand() { switch (GetIsa()) { case InstructionSet::kThumb2: - return {FindTool("llvm-objdump"), "--disassemble", "--triple", "thumbv7a-linux-gnueabi"}; + return {FindTool("llvm-objdump"), + "--disassemble", + "--no-print-imm-hex", + "--triple", + "thumbv7a-linux-gnueabi"}; default: - return {FindTool("llvm-objdump"), "--disassemble", "--no-show-raw-insn"}; + return { + FindTool("llvm-objdump"), "--disassemble", "--no-print-imm-hex", "--no-show-raw-insn"}; } } diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index b2d4dcd9f6..672cd3d10f 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -30,10 +30,11 @@ #include "utils/assembler_test_base.h" #include "base/hex_dump.h" +#include "base/macros.h" #include "base/malloc_arena_pool.h" #include "common_runtime_test.h" -namespace art { +namespace art HIDDEN { namespace arm { // Include results file (generated manually) @@ -143,7 +144,6 @@ TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) { __ Load(scratch_register, FrameOffset(4092), 4); __ Load(scratch_register, FrameOffset(4096), 4); __ LoadRawPtrFromThread(scratch_register, ThreadOffset32(512)); - __ LoadRef(method_register, scratch_register, MemberOffset(128), /* unpoison_reference= */ false); // Stores __ Store(FrameOffset(32), method_register, 4); @@ -153,19 +153,67 @@ TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) { __ Store(FrameOffset(1024), method_register, 4); __ Store(FrameOffset(4092), scratch_register, 4); __ Store(FrameOffset(4096), scratch_register, 4); - __ StoreImmediateToFrame(FrameOffset(48), 0xFF); - __ StoreImmediateToFrame(FrameOffset(48), 0xFFFFFF); __ StoreRawPtr(FrameOffset(48), scratch_register); - __ StoreRef(FrameOffset(48), scratch_register); - __ StoreSpanning(FrameOffset(48), method_register, FrameOffset(48)); - __ StoreStackOffsetToThread(ThreadOffset32(512), FrameOffset(4096)); - __ StoreStackPointerToThread(ThreadOffset32(512)); + __ StoreStackPointerToThread(ThreadOffset32(512), false); + __ StoreStackPointerToThread(ThreadOffset32(512), true); + + // MoveArguments + static constexpr FrameOffset kInvalidReferenceOffset = + JNIMacroAssembler<kArmPointerSize>::kInvalidReferenceOffset; + static constexpr size_t kNativePointerSize = static_cast<size_t>(kArmPointerSize); + // Normal or @FastNative with parameters (Object, long, long, int, Object). + // Note: This shall not spill the reference R1 to [sp, #36]. The JNI compiler spills + // references in an separate initial pass before moving arguments and creating `jobject`s. + ArgumentLocation move_dests1[] = { + ArgumentLocation(ArmManagedRegister::FromCoreRegister(R2), kNativePointerSize), + ArgumentLocation(FrameOffset(0), 2 * kVRegSize), + ArgumentLocation(FrameOffset(8), 2 * kVRegSize), + ArgumentLocation(FrameOffset(16), kVRegSize), + ArgumentLocation(FrameOffset(20), kNativePointerSize), + }; + ArgumentLocation move_srcs1[] = { + ArgumentLocation(ArmManagedRegister::FromCoreRegister(R1), kVRegSize), + ArgumentLocation(ArmManagedRegister::FromRegisterPair(R2_R3), 2 * kVRegSize), + ArgumentLocation(FrameOffset(48), 2 * kVRegSize), + ArgumentLocation(FrameOffset(56), kVRegSize), + ArgumentLocation(FrameOffset(60), kVRegSize), + }; + FrameOffset move_refs1[] { + FrameOffset(36), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(60), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests1), + ArrayRef<ArgumentLocation>(move_srcs1), + ArrayRef<FrameOffset>(move_refs1)); + // @CriticalNative with parameters (long, long, long, int). + ArgumentLocation move_dests2[] = { + ArgumentLocation(ArmManagedRegister::FromRegisterPair(R0_R1), 2 * kVRegSize), + ArgumentLocation(ArmManagedRegister::FromRegisterPair(R2_R3), 2 * kVRegSize), + ArgumentLocation(FrameOffset(0), 2 * kVRegSize), + ArgumentLocation(FrameOffset(8), kVRegSize), + }; + ArgumentLocation move_srcs2[] = { + ArgumentLocation(ArmManagedRegister::FromRegisterPair(R2_R3), 2 * kVRegSize), + ArgumentLocation(FrameOffset(28), kVRegSize), + ArgumentLocation(FrameOffset(32), 2 * kVRegSize), + ArgumentLocation(FrameOffset(40), kVRegSize), + }; + FrameOffset move_refs2[] { + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests2), + ArrayRef<ArgumentLocation>(move_srcs2), + ArrayRef<FrameOffset>(move_refs2)); // Other __ Call(method_register, FrameOffset(48)); __ Copy(FrameOffset(48), FrameOffset(44), 4); - __ CopyRawPtrFromThread(FrameOffset(44), ThreadOffset32(512)); - __ CopyRef(FrameOffset(48), FrameOffset(44)); __ GetCurrentThread(method_register); __ GetCurrentThread(FrameOffset(48)); __ Move(hidden_arg_register, method_register, 4); @@ -176,7 +224,6 @@ TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) { __ CreateJObject(high_register, FrameOffset(48), high_register, true); __ CreateJObject(high_register, FrameOffset(48), high_register, false); __ CreateJObject(method_register, FrameOffset(48), high_register, true); - __ CreateJObject(FrameOffset(48), FrameOffset(64), true); __ CreateJObject(method_register, FrameOffset(0), high_register, true); __ CreateJObject(method_register, FrameOffset(1028), high_register, true); __ CreateJObject(high_register, FrameOffset(1028), high_register, true); diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index b6c6025e41..aea7f14762 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -1,258 +1,259 @@ const char* const VixlJniHelpersResults = { - " 0: 2d e9 e0 4d push.w {r5, r6, r7, r8, r10, r11, lr}\n" - " 4: 2d ed 10 8a vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}\n" - " 8: 81 b0 sub sp, #4\n" - " a: 00 90 str r0, [sp]\n" - " c: 19 91 str r1, [sp, #100]\n" - " e: 8d ed 1a 0a vstr s0, [sp, #104]\n" - " 12: 1b 92 str r2, [sp, #108]\n" - " 14: 1c 93 str r3, [sp, #112]\n" - " 16: 88 b0 sub sp, #32\n" - " 18: ad f5 80 5d sub.w sp, sp, #4096\n" - " 1c: 08 98 ldr r0, [sp, #32]\n" - " 1e: 1f 98 ldr r0, [sp, #124]\n" - " 20: 21 98 ldr r0, [sp, #132]\n" - " 22: ff 98 ldr r0, [sp, #1020]\n" - " 24: dd f8 00 04 ldr.w r0, [sp, #1024]\n" - " 28: dd f8 fc cf ldr.w r12, [sp, #4092]\n" - " 2c: 0d f5 80 5c add.w r12, sp, #4096\n" - " 30: dc f8 00 c0 ldr.w r12, [r12]\n" - " 34: d9 f8 00 c2 ldr.w r12, [r9, #512]\n" - " 38: dc f8 80 00 ldr.w r0, [r12, #128]\n" - " 3c: 08 90 str r0, [sp, #32]\n" - " 3e: 1f 90 str r0, [sp, #124]\n" - " 40: 21 90 str r0, [sp, #132]\n" - " 42: ff 90 str r0, [sp, #1020]\n" - " 44: cd f8 00 04 str.w r0, [sp, #1024]\n" - " 48: cd f8 fc cf str.w r12, [sp, #4092]\n" - " 4c: 4d f8 04 5d str r5, [sp, #-4]!\n" - " 50: 0d f5 80 55 add.w r5, sp, #4096\n" - " 54: c5 f8 04 c0 str.w r12, [r5, #4]\n" - " 58: 5d f8 04 5b ldr r5, [sp], #4\n" - " 5c: 4f f0 ff 0c mov.w r12, #255\n" - " 60: cd f8 30 c0 str.w r12, [sp, #48]\n" - " 64: 6f f0 7f 4c mvn r12, #4278190080\n" - " 68: cd f8 30 c0 str.w r12, [sp, #48]\n" - " 6c: cd f8 30 c0 str.w r12, [sp, #48]\n" - " 70: cd f8 30 c0 str.w r12, [sp, #48]\n" - " 74: 0c 90 str r0, [sp, #48]\n" - " 76: dd f8 30 c0 ldr.w r12, [sp, #48]\n" - " 7a: cd f8 34 c0 str.w r12, [sp, #52]\n" - " 7e: 0d f5 80 5c add.w r12, sp, #4096\n" - " 82: c9 f8 00 c2 str.w r12, [r9, #512]\n" - " 86: c9 f8 00 d2 str.w sp, [r9, #512]\n" - " 8a: d0 f8 30 e0 ldr.w lr, [r0, #48]\n" - " 8e: f0 47 blx lr\n" - " 90: dd f8 2c c0 ldr.w r12, [sp, #44]\n" - " 94: cd f8 30 c0 str.w r12, [sp, #48]\n" - " 98: d9 f8 00 c2 ldr.w r12, [r9, #512]\n" - " 9c: cd f8 2c c0 str.w r12, [sp, #44]\n" - " a0: dd f8 2c c0 ldr.w r12, [sp, #44]\n" - " a4: cd f8 30 c0 str.w r12, [sp, #48]\n" - " a8: 48 46 mov r0, r9\n" - " aa: cd f8 30 90 str.w r9, [sp, #48]\n" - " ae: 04 46 mov r4, r0\n" - " b0: 0d f1 30 0c add.w r12, sp, #48\n" - " b4: bb f1 00 0f cmp.w r11, #0\n" - " b8: 18 bf it ne\n" - " ba: e3 46 movne r11, r12\n" - " bc: 0d f1 30 0b add.w r11, sp, #48\n" - " c0: 5f ea 0b 00 movs.w r0, r11\n" - " c4: 18 bf it ne\n" - " c6: 0c a8 addne r0, sp, #48\n" - " c8: dd f8 40 c0 ldr.w r12, [sp, #64]\n" - " cc: bc f1 00 0f cmp.w r12, #0\n" - " d0: 18 bf it ne\n" - " d2: 0d f1 40 0c addne.w r12, sp, #64\n" - " d6: cd f8 30 c0 str.w r12, [sp, #48]\n" - " da: 5f ea 0b 00 movs.w r0, r11\n" - " de: 18 bf it ne\n" - " e0: 00 a8 addne r0, sp, #0\n" - " e2: 0d f2 04 40 addw r0, sp, #1028\n" - " e6: bb f1 00 0f cmp.w r11, #0\n" - " ea: 08 bf it eq\n" - " ec: 58 46 moveq r0, r11\n" - " ee: 0d f2 04 4c addw r12, sp, #1028\n" - " f2: bb f1 00 0f cmp.w r11, #0\n" - " f6: 18 bf it ne\n" - " f8: e3 46 movne r11, r12\n" - " fa: d9 f8 94 c0 ldr.w r12, [r9, #148]\n" - " fe: bc f1 00 0f cmp.w r12, #0\n" - " 102: 71 d1 bne 0x1e8 @ imm = #226\n" - " 104: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 108: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 10c: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 110: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 114: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 118: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 11c: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 120: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 124: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 128: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 12c: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 130: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 134: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 138: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 13c: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 140: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 144: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 148: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 14c: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 150: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 154: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 158: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 15c: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 160: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 164: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 168: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 16c: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 170: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 174: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 178: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 17c: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 180: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 184: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 188: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 18c: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 190: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 194: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 198: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 19c: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1a0: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1a4: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1a8: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1ac: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1b0: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1b4: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1b8: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1bc: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1c0: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1c4: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1c8: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1cc: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1d0: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1d4: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1d8: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1dc: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1e0: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1e4: 00 f0 02 b8 b.w 0x1ec @ imm = #4\n" - " 1e8: 00 f0 1b b8 b.w 0x222 @ imm = #54\n" - " 1ec: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1f0: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1f4: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1f8: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 1fc: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 200: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 204: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 208: cd f8 ff c7 str.w r12, [sp, #2047]\n" - " 20c: 0d f5 80 5d add.w sp, sp, #4096\n" - " 210: 08 b0 add sp, #32\n" - " 212: 01 b0 add sp, #4\n" - " 214: bd ec 10 8a vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}\n" - " 218: bd e8 e0 4d pop.w {r5, r6, r7, r8, r10, r11, lr}\n" - " 21c: d9 f8 24 80 ldr.w r8, [r9, #36]\n" - " 220: 70 47 bx lr\n" - " 222: d9 f8 94 00 ldr.w r0, [r9, #148]\n" - " 226: d9 f8 c8 e2 ldr.w lr, [r9, #712]\n" - " 22a: f0 47 blx lr\n" + " 0: e92d 4de0 push.w {r5, r6, r7, r8, r10, r11, lr}\n" + " 4: ed2d 8a10 vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}\n" + " 8: b081 sub sp, #4\n" + " a: 9000 str r0, [sp]\n" + " c: 9119 str r1, [sp, #100]\n" + " e: ed8d 0a1a vstr s0, [sp, #104]\n" + " 12: 921b str r2, [sp, #108]\n" + " 14: 931c str r3, [sp, #112]\n" + " 16: b088 sub sp, #32\n" + " 18: f5ad 5d80 sub.w sp, sp, #4096\n" + " 1c: 9808 ldr r0, [sp, #32]\n" + " 1e: 981f ldr r0, [sp, #124]\n" + " 20: 9821 ldr r0, [sp, #132]\n" + " 22: 98ff ldr r0, [sp, #1020]\n" + " 24: f8dd 0400 ldr.w r0, [sp, #1024]\n" + " 28: f8dd cffc ldr.w r12, [sp, #4092]\n" + " 2c: f50d 5c80 add.w r12, sp, #4096\n" + " 30: f8dc c000 ldr.w r12, [r12]\n" + " 34: f8d9 c200 ldr.w r12, [r9, #512]\n" + " 38: 9008 str r0, [sp, #32]\n" + " 3a: 901f str r0, [sp, #124]\n" + " 3c: 9021 str r0, [sp, #132]\n" + " 3e: 90ff str r0, [sp, #1020]\n" + " 40: f8cd 0400 str.w r0, [sp, #1024]\n" + " 44: f8cd cffc str.w r12, [sp, #4092]\n" + " 48: f84d 5d04 str r5, [sp, #-4]!\n" + " 4c: f50d 5580 add.w r5, sp, #4096\n" + " 50: f8c5 c004 str.w r12, [r5, #4]\n" + " 54: f85d 5b04 ldr r5, [sp], #4\n" + " 58: f8cd c030 str.w r12, [sp, #48]\n" + " 5c: f8c9 d200 str.w sp, [r9, #512]\n" + " 60: f04d 0c02 orr r12, sp, #2\n" + " 64: f8c9 c200 str.w r12, [r9, #512]\n" + " 68: a909 add r1, sp, #36\n" + " 6a: e9cd 2300 strd r2, r3, [sp]\n" + " 6e: e9dd 020c ldrd r0, r2, [sp, #48]\n" + " 72: e9cd 0202 strd r0, r2, [sp, #8]\n" + " 76: e9dd 020e ldrd r0, r2, [sp, #56]\n" + " 7a: 2a00 cmp r2, #0\n" + " 7c: bf18 it ne\n" + " 7e: aa0f addne r2, sp, #60\n" + " 80: e9cd 0204 strd r0, r2, [sp, #16]\n" + " 84: 460a mov r2, r1\n" + " 86: e9dd 0108 ldrd r0, r1, [sp, #32]\n" + " 8a: e9cd 0100 strd r0, r1, [sp]\n" + " 8e: f8dd c028 ldr.w r12, [sp, #40]\n" + " 92: f8cd c008 str.w r12, [sp, #8]\n" + " 96: 4610 mov r0, r2\n" + " 98: 4619 mov r1, r3\n" + " 9a: 9a07 ldr r2, [sp, #28]\n" + " 9c: 9b08 ldr r3, [sp, #32]\n" + " 9e: f8d0 e030 ldr.w lr, [r0, #48]\n" + " a2: 47f0 blx lr\n" + " a4: f8dd c02c ldr.w r12, [sp, #44]\n" + " a8: f8cd c030 str.w r12, [sp, #48]\n" + " ac: 4648 mov r0, r9\n" + " ae: f8cd 9030 str.w r9, [sp, #48]\n" + " b2: 4604 mov r4, r0\n" + " b4: f10d 0c30 add.w r12, sp, #48\n" + " b8: f1bb 0f00 cmp.w r11, #0\n" + " bc: bf18 it ne\n" + " be: 46e3 movne r11, r12\n" + " c0: f10d 0b30 add.w r11, sp, #48\n" + " c4: ea5f 000b movs.w r0, r11\n" + " c8: bf18 it ne\n" + " ca: a80c addne r0, sp, #48\n" + " cc: ea5f 000b movs.w r0, r11\n" + " d0: bf18 it ne\n" + " d2: a800 addne r0, sp, #0\n" + " d4: f20d 4004 addw r0, sp, #1028\n" + " d8: f1bb 0f00 cmp.w r11, #0\n" + " dc: bf08 it eq\n" + " de: 4658 moveq r0, r11\n" + " e0: f20d 4c04 addw r12, sp, #1028\n" + " e4: f1bb 0f00 cmp.w r11, #0\n" + " e8: bf18 it ne\n" + " ea: 46e3 movne r11, r12\n" + " ec: f8d9 c09c ldr.w r12, [r9, #156]\n" + " f0: f1bc 0f00 cmp.w r12, #0\n" + " f4: d16f bne 0x1d6 @ imm = #222\n" + " f6: f8cd c7ff str.w r12, [sp, #2047]\n" + " fa: f8cd c7ff str.w r12, [sp, #2047]\n" + " fe: f8cd c7ff str.w r12, [sp, #2047]\n" + " 102: f8cd c7ff str.w r12, [sp, #2047]\n" + " 106: f8cd c7ff str.w r12, [sp, #2047]\n" + " 10a: f8cd c7ff str.w r12, [sp, #2047]\n" + " 10e: f8cd c7ff str.w r12, [sp, #2047]\n" + " 112: f8cd c7ff str.w r12, [sp, #2047]\n" + " 116: f8cd c7ff str.w r12, [sp, #2047]\n" + " 11a: f8cd c7ff str.w r12, [sp, #2047]\n" + " 11e: f8cd c7ff str.w r12, [sp, #2047]\n" + " 122: f8cd c7ff str.w r12, [sp, #2047]\n" + " 126: f8cd c7ff str.w r12, [sp, #2047]\n" + " 12a: f8cd c7ff str.w r12, [sp, #2047]\n" + " 12e: f8cd c7ff str.w r12, [sp, #2047]\n" + " 132: f8cd c7ff str.w r12, [sp, #2047]\n" + " 136: f8cd c7ff str.w r12, [sp, #2047]\n" + " 13a: f8cd c7ff str.w r12, [sp, #2047]\n" + " 13e: f8cd c7ff str.w r12, [sp, #2047]\n" + " 142: f8cd c7ff str.w r12, [sp, #2047]\n" + " 146: f8cd c7ff str.w r12, [sp, #2047]\n" + " 14a: f8cd c7ff str.w r12, [sp, #2047]\n" + " 14e: f8cd c7ff str.w r12, [sp, #2047]\n" + " 152: f8cd c7ff str.w r12, [sp, #2047]\n" + " 156: f8cd c7ff str.w r12, [sp, #2047]\n" + " 15a: f8cd c7ff str.w r12, [sp, #2047]\n" + " 15e: f8cd c7ff str.w r12, [sp, #2047]\n" + " 162: f8cd c7ff str.w r12, [sp, #2047]\n" + " 166: f8cd c7ff str.w r12, [sp, #2047]\n" + " 16a: f8cd c7ff str.w r12, [sp, #2047]\n" + " 16e: f8cd c7ff str.w r12, [sp, #2047]\n" + " 172: f8cd c7ff str.w r12, [sp, #2047]\n" + " 176: f8cd c7ff str.w r12, [sp, #2047]\n" + " 17a: f8cd c7ff str.w r12, [sp, #2047]\n" + " 17e: f8cd c7ff str.w r12, [sp, #2047]\n" + " 182: f8cd c7ff str.w r12, [sp, #2047]\n" + " 186: f8cd c7ff str.w r12, [sp, #2047]\n" + " 18a: f8cd c7ff str.w r12, [sp, #2047]\n" + " 18e: f8cd c7ff str.w r12, [sp, #2047]\n" + " 192: f8cd c7ff str.w r12, [sp, #2047]\n" + " 196: f8cd c7ff str.w r12, [sp, #2047]\n" + " 19a: f8cd c7ff str.w r12, [sp, #2047]\n" + " 19e: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1a2: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1a6: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1aa: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1ae: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1b2: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1b6: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1ba: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1be: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1c2: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1c6: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1ca: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1ce: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1d2: f000 b803 b.w 0x1dc @ imm = #6\n" + " 1d6: f000 b81e b.w 0x216 @ imm = #60\n" + " 1da: 0000 movs r0, r0\n" + " 1dc: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1e0: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1e4: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1e8: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1ec: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1f0: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1f4: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1f8: f8cd c7ff str.w r12, [sp, #2047]\n" + " 1fc: f8cd c7ff str.w r12, [sp, #2047]\n" + " 200: f50d 5d80 add.w sp, sp, #4096\n" + " 204: b008 add sp, #32\n" + " 206: b001 add sp, #4\n" + " 208: ecbd 8a10 vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}\n" + " 20c: e8bd 4de0 pop.w {r5, r6, r7, r8, r10, r11, lr}\n" + " 210: f8d9 8024 ldr.w r8, [r9, #36]\n" + " 214: 4770 bx lr\n" + " 216: f8d9 009c ldr.w r0, [r9, #156]\n" + " 21a: f8d9 e2d0 ldr.w lr, [r9, #720]\n" + " 21e: 47f0 blx lr\n" }; const char* const VixlLoadFromOffsetResults = { - " 0: e2 68 ldr r2, [r4, #12]\n" - " 2: d4 f8 ff 2f ldr.w r2, [r4, #4095]\n" - " 6: 04 f5 80 52 add.w r2, r4, #4096\n" - " a: 12 68 ldr r2, [r2]\n" - " c: 04 f5 80 12 add.w r2, r4, #1048576\n" - " 10: d2 f8 a4 20 ldr.w r2, [r2, #164]\n" - " 14: 4f f4 80 52 mov.w r2, #4096\n" - " 18: c0 f2 10 02 movt r2, #16\n" - " 1c: 22 44 add r2, r4\n" - " 1e: 12 68 ldr r2, [r2]\n" - " 20: 4f f4 80 5c mov.w r12, #4096\n" - " 24: c0 f2 10 0c movt r12, #16\n" - " 28: 64 44 add r4, r12\n" - " 2a: 24 68 ldr r4, [r4]\n" - " 2c: a2 89 ldrh r2, [r4, #12]\n" - " 2e: b4 f8 ff 2f ldrh.w r2, [r4, #4095]\n" - " 32: 04 f5 80 52 add.w r2, r4, #4096\n" - " 36: 12 88 ldrh r2, [r2]\n" - " 38: 04 f5 80 12 add.w r2, r4, #1048576\n" - " 3c: b2 f8 a4 20 ldrh.w r2, [r2, #164]\n" - " 40: 4f f4 80 52 mov.w r2, #4096\n" - " 44: c0 f2 10 02 movt r2, #16\n" - " 48: 22 44 add r2, r4\n" - " 4a: 12 88 ldrh r2, [r2]\n" - " 4c: 4f f4 80 5c mov.w r12, #4096\n" - " 50: c0 f2 10 0c movt r12, #16\n" - " 54: 64 44 add r4, r12\n" - " 56: 24 88 ldrh r4, [r4]\n" - " 58: d4 e9 03 23 ldrd r2, r3, [r4, #12]\n" - " 5c: d4 e9 ff 23 ldrd r2, r3, [r4, #1020]\n" - " 60: 04 f5 80 62 add.w r2, r4, #1024\n" - " 64: d2 e9 00 23 ldrd r2, r3, [r2]\n" - " 68: 04 f5 80 22 add.w r2, r4, #262144\n" - " 6c: d2 e9 29 23 ldrd r2, r3, [r2, #164]\n" - " 70: 4f f4 80 62 mov.w r2, #1024\n" - " 74: c0 f2 04 02 movt r2, #4\n" - " 78: 22 44 add r2, r4\n" - " 7a: d2 e9 00 23 ldrd r2, r3, [r2]\n" - " 7e: 4f f4 80 6c mov.w r12, #1024\n" - " 82: c0 f2 04 0c movt r12, #4\n" - " 86: 64 44 add r4, r12\n" - " 88: d4 e9 00 45 ldrd r4, r5, [r4]\n" - " 8c: dc f8 0c 00 ldr.w r0, [r12, #12]\n" - " 90: a4 f5 80 12 sub.w r2, r4, #1048576\n" - " 94: d2 f8 a4 20 ldr.w r2, [r2, #164]\n" - " 98: 94 f9 0c 20 ldrsb.w r2, [r4, #12]\n" - " 9c: 22 7b ldrb r2, [r4, #12]\n" - " 9e: b4 f9 0c 20 ldrsh.w r2, [r4, #12]\n" + " 0: 68e2 ldr r2, [r4, #12]\n" + " 2: f8d4 2fff ldr.w r2, [r4, #4095]\n" + " 6: f504 5280 add.w r2, r4, #4096\n" + " a: 6812 ldr r2, [r2]\n" + " c: f504 1280 add.w r2, r4, #1048576\n" + " 10: f8d2 20a4 ldr.w r2, [r2, #164]\n" + " 14: f44f 5280 mov.w r2, #4096\n" + " 18: f2c0 0210 movt r2, #16\n" + " 1c: 4422 add r2, r4\n" + " 1e: 6812 ldr r2, [r2]\n" + " 20: f44f 5c80 mov.w r12, #4096\n" + " 24: f2c0 0c10 movt r12, #16\n" + " 28: 4464 add r4, r12\n" + " 2a: 6824 ldr r4, [r4]\n" + " 2c: 89a2 ldrh r2, [r4, #12]\n" + " 2e: f8b4 2fff ldrh.w r2, [r4, #4095]\n" + " 32: f504 5280 add.w r2, r4, #4096\n" + " 36: 8812 ldrh r2, [r2]\n" + " 38: f504 1280 add.w r2, r4, #1048576\n" + " 3c: f8b2 20a4 ldrh.w r2, [r2, #164]\n" + " 40: f44f 5280 mov.w r2, #4096\n" + " 44: f2c0 0210 movt r2, #16\n" + " 48: 4422 add r2, r4\n" + " 4a: 8812 ldrh r2, [r2]\n" + " 4c: f44f 5c80 mov.w r12, #4096\n" + " 50: f2c0 0c10 movt r12, #16\n" + " 54: 4464 add r4, r12\n" + " 56: 8824 ldrh r4, [r4]\n" + " 58: e9d4 2303 ldrd r2, r3, [r4, #12]\n" + " 5c: e9d4 23ff ldrd r2, r3, [r4, #1020]\n" + " 60: f504 6280 add.w r2, r4, #1024\n" + " 64: e9d2 2300 ldrd r2, r3, [r2]\n" + " 68: f504 2280 add.w r2, r4, #262144\n" + " 6c: e9d2 2329 ldrd r2, r3, [r2, #164]\n" + " 70: f44f 6280 mov.w r2, #1024\n" + " 74: f2c0 0204 movt r2, #4\n" + " 78: 4422 add r2, r4\n" + " 7a: e9d2 2300 ldrd r2, r3, [r2]\n" + " 7e: f44f 6c80 mov.w r12, #1024\n" + " 82: f2c0 0c04 movt r12, #4\n" + " 86: 4464 add r4, r12\n" + " 88: e9d4 4500 ldrd r4, r5, [r4]\n" + " 8c: f8dc 000c ldr.w r0, [r12, #12]\n" + " 90: f5a4 1280 sub.w r2, r4, #1048576\n" + " 94: f8d2 20a4 ldr.w r2, [r2, #164]\n" + " 98: f994 200c ldrsb.w r2, [r4, #12]\n" + " 9c: 7b22 ldrb r2, [r4, #12]\n" + " 9e: f9b4 200c ldrsh.w r2, [r4, #12]\n" }; const char* const VixlStoreToOffsetResults = { - " 0: e2 60 str r2, [r4, #12]\n" - " 2: c4 f8 ff 2f str.w r2, [r4, #4095]\n" - " 6: 04 f5 80 5c add.w r12, r4, #4096\n" - " a: cc f8 00 20 str.w r2, [r12]\n" - " e: 04 f5 80 1c add.w r12, r4, #1048576\n" - " 12: cc f8 a4 20 str.w r2, [r12, #164]\n" - " 16: 4f f4 80 5c mov.w r12, #4096\n" - " 1a: c0 f2 10 0c movt r12, #16\n" - " 1e: a4 44 add r12, r4\n" - " 20: cc f8 00 20 str.w r2, [r12]\n" - " 24: 4f f4 80 5c mov.w r12, #4096\n" - " 28: c0 f2 10 0c movt r12, #16\n" - " 2c: a4 44 add r12, r4\n" - " 2e: cc f8 00 40 str.w r4, [r12]\n" - " 32: a2 81 strh r2, [r4, #12]\n" - " 34: a4 f8 ff 2f strh.w r2, [r4, #4095]\n" - " 38: 04 f5 80 5c add.w r12, r4, #4096\n" - " 3c: ac f8 00 20 strh.w r2, [r12]\n" - " 40: 04 f5 80 1c add.w r12, r4, #1048576\n" - " 44: ac f8 a4 20 strh.w r2, [r12, #164]\n" - " 48: 4f f4 80 5c mov.w r12, #4096\n" - " 4c: c0 f2 10 0c movt r12, #16\n" - " 50: a4 44 add r12, r4\n" - " 52: ac f8 00 20 strh.w r2, [r12]\n" - " 56: 4f f4 80 5c mov.w r12, #4096\n" - " 5a: c0 f2 10 0c movt r12, #16\n" - " 5e: a4 44 add r12, r4\n" - " 60: ac f8 00 40 strh.w r4, [r12]\n" - " 64: c4 e9 03 23 strd r2, r3, [r4, #12]\n" - " 68: c4 e9 ff 23 strd r2, r3, [r4, #1020]\n" - " 6c: 04 f5 80 6c add.w r12, r4, #1024\n" - " 70: cc e9 00 23 strd r2, r3, [r12]\n" - " 74: 04 f5 80 2c add.w r12, r4, #262144\n" - " 78: cc e9 29 23 strd r2, r3, [r12, #164]\n" - " 7c: 4f f4 80 6c mov.w r12, #1024\n" - " 80: c0 f2 04 0c movt r12, #4\n" - " 84: a4 44 add r12, r4\n" - " 86: cc e9 00 23 strd r2, r3, [r12]\n" - " 8a: 4f f4 80 6c mov.w r12, #1024\n" - " 8e: c0 f2 04 0c movt r12, #4\n" - " 92: a4 44 add r12, r4\n" - " 94: cc e9 00 45 strd r4, r5, [r12]\n" - " 98: cc f8 0c 00 str.w r0, [r12, #12]\n" - " 9c: a4 f5 80 1c sub.w r12, r4, #1048576\n" - " a0: cc f8 a4 20 str.w r2, [r12, #164]\n" - " a4: 22 73 strb r2, [r4, #12]\n" + " 0: 60e2 str r2, [r4, #12]\n" + " 2: f8c4 2fff str.w r2, [r4, #4095]\n" + " 6: f504 5c80 add.w r12, r4, #4096\n" + " a: f8cc 2000 str.w r2, [r12]\n" + " e: f504 1c80 add.w r12, r4, #1048576\n" + " 12: f8cc 20a4 str.w r2, [r12, #164]\n" + " 16: f44f 5c80 mov.w r12, #4096\n" + " 1a: f2c0 0c10 movt r12, #16\n" + " 1e: 44a4 add r12, r4\n" + " 20: f8cc 2000 str.w r2, [r12]\n" + " 24: f44f 5c80 mov.w r12, #4096\n" + " 28: f2c0 0c10 movt r12, #16\n" + " 2c: 44a4 add r12, r4\n" + " 2e: f8cc 4000 str.w r4, [r12]\n" + " 32: 81a2 strh r2, [r4, #12]\n" + " 34: f8a4 2fff strh.w r2, [r4, #4095]\n" + " 38: f504 5c80 add.w r12, r4, #4096\n" + " 3c: f8ac 2000 strh.w r2, [r12]\n" + " 40: f504 1c80 add.w r12, r4, #1048576\n" + " 44: f8ac 20a4 strh.w r2, [r12, #164]\n" + " 48: f44f 5c80 mov.w r12, #4096\n" + " 4c: f2c0 0c10 movt r12, #16\n" + " 50: 44a4 add r12, r4\n" + " 52: f8ac 2000 strh.w r2, [r12]\n" + " 56: f44f 5c80 mov.w r12, #4096\n" + " 5a: f2c0 0c10 movt r12, #16\n" + " 5e: 44a4 add r12, r4\n" + " 60: f8ac 4000 strh.w r4, [r12]\n" + " 64: e9c4 2303 strd r2, r3, [r4, #12]\n" + " 68: e9c4 23ff strd r2, r3, [r4, #1020]\n" + " 6c: f504 6c80 add.w r12, r4, #1024\n" + " 70: e9cc 2300 strd r2, r3, [r12]\n" + " 74: f504 2c80 add.w r12, r4, #262144\n" + " 78: e9cc 2329 strd r2, r3, [r12, #164]\n" + " 7c: f44f 6c80 mov.w r12, #1024\n" + " 80: f2c0 0c04 movt r12, #4\n" + " 84: 44a4 add r12, r4\n" + " 86: e9cc 2300 strd r2, r3, [r12]\n" + " 8a: f44f 6c80 mov.w r12, #1024\n" + " 8e: f2c0 0c04 movt r12, #4\n" + " 92: 44a4 add r12, r4\n" + " 94: e9cc 4500 strd r4, r5, [r12]\n" + " 98: f8cc 000c str.w r0, [r12, #12]\n" + " 9c: f5a4 1c80 sub.w r12, r4, #1048576\n" + " a0: f8cc 20a4 str.w r2, [r12, #164]\n" + " a4: 7322 strb r2, [r4, #12]\n" }; diff --git a/compiler/utils/atomic_dex_ref_map-inl.h b/compiler/utils/atomic_dex_ref_map-inl.h index 377b7fe352..5f68a7c701 100644 --- a/compiler/utils/atomic_dex_ref_map-inl.h +++ b/compiler/utils/atomic_dex_ref_map-inl.h @@ -21,12 +21,13 @@ #include <type_traits> +#include "base/macros.h" #include "dex/class_reference.h" #include "dex/dex_file-inl.h" #include "dex/method_reference.h" #include "dex/type_reference.h" -namespace art { +namespace art HIDDEN { template <typename DexFileReferenceType, typename Value> inline size_t AtomicDexRefMap<DexFileReferenceType, Value>::NumberOfDexIndices( diff --git a/compiler/utils/atomic_dex_ref_map.h b/compiler/utils/atomic_dex_ref_map.h index a8c285f765..b10fef50c5 100644 --- a/compiler/utils/atomic_dex_ref_map.h +++ b/compiler/utils/atomic_dex_ref_map.h @@ -19,10 +19,11 @@ #include "base/atomic.h" #include "base/dchecked_vector.h" +#include "base/macros.h" #include "base/safe_map.h" #include "dex/dex_file_reference.h" -namespace art { +namespace art HIDDEN { class DexFile; diff --git a/compiler/utils/atomic_dex_ref_map_test.cc b/compiler/utils/atomic_dex_ref_map_test.cc index 864531ed91..329735b796 100644 --- a/compiler/utils/atomic_dex_ref_map_test.cc +++ b/compiler/utils/atomic_dex_ref_map_test.cc @@ -18,12 +18,13 @@ #include <memory> +#include "base/macros.h" #include "common_runtime_test.h" #include "dex/dex_file-inl.h" #include "dex/method_reference.h" #include "scoped_thread_state_change-inl.h" -namespace art { +namespace art HIDDEN { class AtomicDexRefMapTest : public CommonRuntimeTest {}; diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h index d4a9cc829b..db744c53f7 100644 --- a/compiler/utils/dedupe_set-inl.h +++ b/compiler/utils/dedupe_set-inl.h @@ -27,11 +27,12 @@ #include "android-base/stringprintf.h" #include "base/hash_set.h" +#include "base/macros.h" #include "base/mutex.h" #include "base/stl_util.h" #include "base/time_utils.h" -namespace art { +namespace art HIDDEN { template <typename InKey, typename StoreKey, diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h index a1ba208d2c..42db8e3ca0 100644 --- a/compiler/utils/dedupe_set.h +++ b/compiler/utils/dedupe_set.h @@ -23,7 +23,7 @@ #include "base/macros.h" -namespace art { +namespace art HIDDEN { class Thread; diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc index b390508ed4..89385e7c82 100644 --- a/compiler/utils/dedupe_set_test.cc +++ b/compiler/utils/dedupe_set_test.cc @@ -21,11 +21,12 @@ #include <vector> #include "base/array_ref.h" +#include "base/macros.h" #include "dedupe_set-inl.h" #include "gtest/gtest.h" #include "thread-current-inl.h" -namespace art { +namespace art HIDDEN { class DedupeSetTestHashFunc { public: diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc index d6d49f8faa..8b47b38e63 100644 --- a/compiler/utils/jni_macro_assembler.cc +++ b/compiler/utils/jni_macro_assembler.cc @@ -35,7 +35,7 @@ #include "base/globals.h" #include "base/memory_region.h" -namespace art { +namespace art HIDDEN { using MacroAsm32UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k32>>; @@ -58,6 +58,7 @@ MacroAsm32UniquePtr JNIMacroAssembler<PointerSize::k32>::Create( return MacroAsm32UniquePtr(new (allocator) x86::X86JNIMacroAssembler(allocator)); #endif default: + UNUSED(allocator); LOG(FATAL) << "Unknown/unsupported 4B InstructionSet: " << instruction_set; UNREACHABLE(); } diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h index 7022e3df92..0c729705dc 100644 --- a/compiler/utils/jni_macro_assembler.h +++ b/compiler/utils/jni_macro_assembler.h @@ -30,7 +30,7 @@ #include "managed_register.h" #include "offsets.h" -namespace art { +namespace art HIDDEN { class ArenaAllocator; class DebugFrameOpCodeWriterForAssembler; @@ -118,37 +118,18 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> { // Store routines virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0; virtual void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) = 0; - virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0; virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0; - virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) = 0; - - virtual void StoreStackOffsetToThread(ThreadOffset<kPointerSize> thr_offs, - FrameOffset fr_offs) = 0; - - virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs) = 0; - - virtual void StoreSpanning(FrameOffset dest, - ManagedRegister src, - FrameOffset in_off) = 0; + // Stores stack pointer by tagging it if required so we can walk the stack. In debuggable runtimes + // we use tag to tell if we are using JITed code or AOT code. In non-debuggable runtimes we never + // use JITed code when AOT code is present. So checking for AOT code is sufficient to detect which + // code is being executed. We avoid tagging in non-debuggable runtimes to reduce instructions. + virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs, bool tag_sp) = 0; // Load routines virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0; virtual void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) = 0; - virtual void LoadFromThread(ManagedRegister dest, - ThreadOffset<kPointerSize> src, - size_t size) = 0; - - virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0; - // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference. - virtual void LoadRef(ManagedRegister dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) = 0; - - virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0; - virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0; // Copying routines @@ -165,53 +146,7 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> { virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0; - virtual void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset<kPointerSize> thr_offs) = 0; - - virtual void CopyRawPtrToThread(ThreadOffset<kPointerSize> thr_offs, - FrameOffset fr_offs, - ManagedRegister scratch) = 0; - - virtual void CopyRef(FrameOffset dest, FrameOffset src) = 0; - virtual void CopyRef(FrameOffset dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) = 0; - - virtual void Copy(FrameOffset dest, FrameOffset src, size_t size) = 0; - - virtual void Copy(FrameOffset dest, - ManagedRegister src_base, - Offset src_offset, - ManagedRegister scratch, - size_t size) = 0; - - virtual void Copy(ManagedRegister dest_base, - Offset dest_offset, - FrameOffset src, - ManagedRegister scratch, - size_t size) = 0; - - virtual void Copy(FrameOffset dest, - FrameOffset src_base, - Offset src_offset, - ManagedRegister scratch, - size_t size) = 0; - - virtual void Copy(ManagedRegister dest, - Offset dest_offset, - ManagedRegister src, - Offset src_offset, - ManagedRegister scratch, - size_t size) = 0; - - virtual void Copy(FrameOffset dest, - Offset dest_offset, - FrameOffset src, - Offset src_offset, - ManagedRegister scratch, - size_t size) = 0; - - virtual void MemoryBarrier(ManagedRegister scratch) = 0; + virtual void Move(ManagedRegister dst, size_t value) = 0; // Sign extension virtual void SignExtend(ManagedRegister mreg, size_t size) = 0; @@ -223,20 +158,10 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> { virtual void GetCurrentThread(ManagedRegister dest) = 0; virtual void GetCurrentThread(FrameOffset dest_offset) = 0; - // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value), - // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly - // stale reference that can be used to avoid loading the spilled value to - // see if the value is null. - virtual void CreateJObject(ManagedRegister out_reg, - FrameOffset spilled_reference_offset, - ManagedRegister in_reg, - bool null_allowed) = 0; - - // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value), - // or to be null if the value is null and `null_allowed`. - virtual void CreateJObject(FrameOffset out_off, - FrameOffset spilled_reference_offset, - bool null_allowed) = 0; + // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path. + virtual void DecodeJNITransitionOrLocalJObject(ManagedRegister reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) = 0; // Heap::VerifyObject on src. In some cases (such as a reference to this) we // know that src may not be null. @@ -282,6 +207,8 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> { virtual void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) = 0; + // Emit a conditional jump to label if the loaded value from specified locations is not zero. + virtual void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) = 0; // Code at this offset will serve as the target for the Jump call. virtual void Bind(JNIMacroLabel* label) = 0; diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h index e77177e43e..ac8e7d3010 100644 --- a/compiler/utils/jni_macro_assembler_test.h +++ b/compiler/utils/jni_macro_assembler_test.h @@ -20,6 +20,7 @@ #include "jni_macro_assembler.h" #include "assembler_test_base.h" +#include "base/macros.h" #include "base/malloc_arena_pool.h" #include "common_runtime_test.h" // For ScratchFile @@ -30,7 +31,7 @@ #include <fstream> #include <iterator> -namespace art { +namespace art HIDDEN { template<typename Ass> class JNIMacroAssemblerTest : public AssemblerTestBase { @@ -39,7 +40,7 @@ class JNIMacroAssemblerTest : public AssemblerTestBase { return assembler_.get(); } - typedef std::string (*TestFn)(JNIMacroAssemblerTest* assembler_test, Ass* assembler); + using TestFn = std::string (*)(JNIMacroAssemblerTest *, Ass *); void DriverFn(TestFn f, const std::string& test_name) { DriverWrapper(f(this, assembler_.get()), test_name); diff --git a/compiler/utils/label.h b/compiler/utils/label.h index 282500b1b7..0368d90a26 100644 --- a/compiler/utils/label.h +++ b/compiler/utils/label.h @@ -20,7 +20,9 @@ #include <android-base/logging.h> #include <android-base/macros.h> -namespace art { +#include "base/macros.h" + +namespace art HIDDEN { class Assembler; class AssemblerBuffer; diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h index a3b33ba94d..ba6b46b3b3 100644 --- a/compiler/utils/managed_register.h +++ b/compiler/utils/managed_register.h @@ -20,9 +20,10 @@ #include <type_traits> #include <vector> +#include "base/macros.h" #include "base/value_object.h" -namespace art { +namespace art HIDDEN { namespace arm { class ArmManagedRegister; @@ -31,6 +32,10 @@ namespace arm64 { class Arm64ManagedRegister; } // namespace arm64 +namespace riscv64 { +class Riscv64ManagedRegister; +} // namespace riscv64 + namespace x86 { class X86ManagedRegister; } // namespace x86 @@ -50,6 +55,7 @@ class ManagedRegister : public ValueObject { constexpr arm::ArmManagedRegister AsArm() const; constexpr arm64::Arm64ManagedRegister AsArm64() const; + constexpr riscv64::Riscv64ManagedRegister AsRiscv64() const; constexpr x86::X86ManagedRegister AsX86() const; constexpr x86_64::X86_64ManagedRegister AsX86_64() const; diff --git a/compiler/utils/riscv64/managed_register_riscv64.cc b/compiler/utils/riscv64/managed_register_riscv64.cc new file mode 100644 index 0000000000..560019ae09 --- /dev/null +++ b/compiler/utils/riscv64/managed_register_riscv64.cc @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "managed_register_riscv64.h" + +#include "base/globals.h" + +namespace art { +namespace riscv64 { + +bool Riscv64ManagedRegister::Overlaps(const Riscv64ManagedRegister& other) const { + if (IsNoRegister() || other.IsNoRegister()) { + return false; + } + CHECK(IsValidManagedRegister()); + CHECK(other.IsValidManagedRegister()); + + return Equals(other); +} + +void Riscv64ManagedRegister::Print(std::ostream& os) const { + if (!IsValidManagedRegister()) { + os << "No Register"; + } else if (IsXRegister()) { + os << "XRegister: " << static_cast<int>(AsXRegister()); + } else if (IsFRegister()) { + os << "FRegister: " << static_cast<int>(AsFRegister()); + } else { + os << "??: " << RegId(); + } +} + +std::ostream& operator<<(std::ostream& os, const Riscv64ManagedRegister& reg) { + reg.Print(os); + return os; +} + +} // namespace riscv64 +} // namespace art diff --git a/compiler/utils/riscv64/managed_register_riscv64.h b/compiler/utils/riscv64/managed_register_riscv64.h new file mode 100644 index 0000000000..8e02a9dcc8 --- /dev/null +++ b/compiler/utils/riscv64/managed_register_riscv64.h @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_RISCV64_MANAGED_REGISTER_RISCV64_H_ +#define ART_COMPILER_UTILS_RISCV64_MANAGED_REGISTER_RISCV64_H_ + +#include <android-base/logging.h> + +#include "arch/riscv64/registers_riscv64.h" +#include "base/globals.h" +#include "base/macros.h" +#include "utils/managed_register.h" + +namespace art { +namespace riscv64 { + +const int kNumberOfXRegIds = kNumberOfXRegisters; +const int kNumberOfXAllocIds = kNumberOfXRegisters; + +const int kNumberOfFRegIds = kNumberOfFRegisters; +const int kNumberOfFAllocIds = kNumberOfFRegisters; + +const int kNumberOfRegIds = kNumberOfXRegIds + kNumberOfFRegIds; +const int kNumberOfAllocIds = kNumberOfXAllocIds + kNumberOfFAllocIds; + +// Register ids map: +// [0..R[ core registers (enum XRegister) +// [R..F[ floating-point registers (enum FRegister) +// where +// R = kNumberOfXRegIds +// F = R + kNumberOfFRegIds + +// An instance of class 'ManagedRegister' represents a single Riscv64 register. +// A register can be one of the following: +// * core register (enum XRegister) +// * floating-point register (enum FRegister) +// +// 'ManagedRegister::NoRegister()' provides an invalid register. +// There is a one-to-one mapping between ManagedRegister and register id. +class Riscv64ManagedRegister : public ManagedRegister { + public: + constexpr XRegister AsXRegister() const { + CHECK(IsXRegister()); + return static_cast<XRegister>(id_); + } + + constexpr FRegister AsFRegister() const { + CHECK(IsFRegister()); + return static_cast<FRegister>(id_ - kNumberOfXRegIds); + } + + constexpr bool IsXRegister() const { + CHECK(IsValidManagedRegister()); + return (0 <= id_) && (id_ < kNumberOfXRegIds); + } + + constexpr bool IsFRegister() const { + CHECK(IsValidManagedRegister()); + const int test = id_ - kNumberOfXRegIds; + return (0 <= test) && (test < kNumberOfFRegIds); + } + + void Print(std::ostream& os) const; + + // Returns true if the two managed-registers ('this' and 'other') overlap. + // Either managed-register may be the NoRegister. If both are the NoRegister + // then false is returned. + bool Overlaps(const Riscv64ManagedRegister& other) const; + + static constexpr Riscv64ManagedRegister FromXRegister(XRegister r) { + CHECK_NE(r, kNoXRegister); + return FromRegId(r); + } + + static constexpr Riscv64ManagedRegister FromFRegister(FRegister r) { + CHECK_NE(r, kNoFRegister); + return FromRegId(r + kNumberOfXRegIds); + } + + private: + constexpr bool IsValidManagedRegister() const { return (0 <= id_) && (id_ < kNumberOfRegIds); } + + constexpr int RegId() const { + CHECK(!IsNoRegister()); + return id_; + } + + int AllocId() const { + CHECK(IsValidManagedRegister()); + CHECK_LT(id_, kNumberOfAllocIds); + return id_; + } + + int AllocIdLow() const; + int AllocIdHigh() const; + + friend class ManagedRegister; + + explicit constexpr Riscv64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {} + + static constexpr Riscv64ManagedRegister FromRegId(int reg_id) { + Riscv64ManagedRegister reg(reg_id); + CHECK(reg.IsValidManagedRegister()); + return reg; + } +}; + +std::ostream& operator<<(std::ostream& os, const Riscv64ManagedRegister& reg); + +} // namespace riscv64 + +constexpr inline riscv64::Riscv64ManagedRegister ManagedRegister::AsRiscv64() const { + riscv64::Riscv64ManagedRegister reg(id_); + CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister()); + return reg; +} + +} // namespace art + +#endif // ART_COMPILER_UTILS_RISCV64_MANAGED_REGISTER_RISCV64_H_ diff --git a/compiler/utils/riscv64/managed_register_riscv64_test.cc b/compiler/utils/riscv64/managed_register_riscv64_test.cc new file mode 100644 index 0000000000..c6ad2dc38a --- /dev/null +++ b/compiler/utils/riscv64/managed_register_riscv64_test.cc @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "managed_register_riscv64.h" + +#include "base/globals.h" +#include "gtest/gtest.h" + +namespace art { +namespace riscv64 { + +TEST(Riscv64ManagedRegister, NoRegister) { + Riscv64ManagedRegister reg = ManagedRegister::NoRegister().AsRiscv64(); + EXPECT_TRUE(reg.IsNoRegister()); +} + +TEST(Riscv64ManagedRegister, XRegister) { + Riscv64ManagedRegister reg = Riscv64ManagedRegister::FromXRegister(Zero); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsXRegister()); + EXPECT_FALSE(reg.IsFRegister()); + EXPECT_EQ(Zero, reg.AsXRegister()); + + reg = Riscv64ManagedRegister::FromXRegister(RA); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsXRegister()); + EXPECT_FALSE(reg.IsFRegister()); + EXPECT_EQ(RA, reg.AsXRegister()); + + reg = Riscv64ManagedRegister::FromXRegister(SP); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsXRegister()); + EXPECT_FALSE(reg.IsFRegister()); + EXPECT_EQ(SP, reg.AsXRegister()); + + reg = Riscv64ManagedRegister::FromXRegister(GP); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsXRegister()); + EXPECT_FALSE(reg.IsFRegister()); + EXPECT_EQ(GP, reg.AsXRegister()); + + reg = Riscv64ManagedRegister::FromXRegister(T0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsXRegister()); + EXPECT_FALSE(reg.IsFRegister()); + EXPECT_EQ(T0, reg.AsXRegister()); + + reg = Riscv64ManagedRegister::FromXRegister(T2); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsXRegister()); + EXPECT_FALSE(reg.IsFRegister()); + EXPECT_EQ(T2, reg.AsXRegister()); + + reg = Riscv64ManagedRegister::FromXRegister(S0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsXRegister()); + EXPECT_FALSE(reg.IsFRegister()); + EXPECT_EQ(S0, reg.AsXRegister()); + + reg = Riscv64ManagedRegister::FromXRegister(A0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsXRegister()); + EXPECT_FALSE(reg.IsFRegister()); + EXPECT_EQ(A0, reg.AsXRegister()); + + reg = Riscv64ManagedRegister::FromXRegister(A7); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsXRegister()); + EXPECT_FALSE(reg.IsFRegister()); + EXPECT_EQ(A7, reg.AsXRegister()); + + reg = Riscv64ManagedRegister::FromXRegister(S2); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsXRegister()); + EXPECT_FALSE(reg.IsFRegister()); + EXPECT_EQ(S2, reg.AsXRegister()); + + reg = Riscv64ManagedRegister::FromXRegister(T3); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsXRegister()); + EXPECT_FALSE(reg.IsFRegister()); + EXPECT_EQ(T3, reg.AsXRegister()); +} + +TEST(Riscv64ManagedRegister, FRegister) { + Riscv64ManagedRegister reg = Riscv64ManagedRegister::FromFRegister(FT0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsXRegister()); + EXPECT_TRUE(reg.IsFRegister()); + EXPECT_EQ(FT0, reg.AsFRegister()); + EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FT0))); + + reg = Riscv64ManagedRegister::FromFRegister(FT1); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsXRegister()); + EXPECT_TRUE(reg.IsFRegister()); + EXPECT_EQ(FT1, reg.AsFRegister()); + EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FT1))); + + reg = Riscv64ManagedRegister::FromFRegister(FS0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsXRegister()); + EXPECT_TRUE(reg.IsFRegister()); + EXPECT_EQ(FS0, reg.AsFRegister()); + EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FS0))); + + reg = Riscv64ManagedRegister::FromFRegister(FA0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsXRegister()); + EXPECT_TRUE(reg.IsFRegister()); + EXPECT_EQ(FA0, reg.AsFRegister()); + EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FA0))); + + reg = Riscv64ManagedRegister::FromFRegister(FA7); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsXRegister()); + EXPECT_TRUE(reg.IsFRegister()); + EXPECT_EQ(FA7, reg.AsFRegister()); + EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FA7))); + + reg = Riscv64ManagedRegister::FromFRegister(FS4); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsXRegister()); + EXPECT_TRUE(reg.IsFRegister()); + EXPECT_EQ(FS4, reg.AsFRegister()); + EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FS4))); + + reg = Riscv64ManagedRegister::FromFRegister(FT11); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsXRegister()); + EXPECT_TRUE(reg.IsFRegister()); + EXPECT_EQ(FT11, reg.AsFRegister()); + EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FT11))); +} + +TEST(Riscv64ManagedRegister, Equals) { + ManagedRegister no_reg = ManagedRegister::NoRegister(); + EXPECT_TRUE(no_reg.Equals(Riscv64ManagedRegister::NoRegister())); + EXPECT_FALSE(no_reg.Equals(Riscv64ManagedRegister::FromXRegister(Zero))); + EXPECT_FALSE(no_reg.Equals(Riscv64ManagedRegister::FromXRegister(A1))); + EXPECT_FALSE(no_reg.Equals(Riscv64ManagedRegister::FromXRegister(S2))); + EXPECT_FALSE(no_reg.Equals(Riscv64ManagedRegister::FromFRegister(FT0))); + EXPECT_FALSE(no_reg.Equals(Riscv64ManagedRegister::FromFRegister(FT11))); + + Riscv64ManagedRegister reg_Zero = Riscv64ManagedRegister::FromXRegister(Zero); + EXPECT_FALSE(reg_Zero.Equals(Riscv64ManagedRegister::NoRegister())); + EXPECT_TRUE(reg_Zero.Equals(Riscv64ManagedRegister::FromXRegister(Zero))); + EXPECT_FALSE(reg_Zero.Equals(Riscv64ManagedRegister::FromXRegister(A1))); + EXPECT_FALSE(reg_Zero.Equals(Riscv64ManagedRegister::FromXRegister(S2))); + EXPECT_FALSE(reg_Zero.Equals(Riscv64ManagedRegister::FromFRegister(FT0))); + EXPECT_FALSE(reg_Zero.Equals(Riscv64ManagedRegister::FromFRegister(FT11))); + + Riscv64ManagedRegister reg_A1 = Riscv64ManagedRegister::FromXRegister(A1); + EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::FromXRegister(Zero))); + EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::FromXRegister(A0))); + EXPECT_TRUE(reg_A1.Equals(Riscv64ManagedRegister::FromXRegister(A1))); + EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::FromXRegister(S2))); + EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::FromFRegister(FT0))); + EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::FromFRegister(FT11))); + + Riscv64ManagedRegister reg_S2 = Riscv64ManagedRegister::FromXRegister(S2); + EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::FromXRegister(Zero))); + EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::FromXRegister(A1))); + EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::FromXRegister(S1))); + EXPECT_TRUE(reg_S2.Equals(Riscv64ManagedRegister::FromXRegister(S2))); + EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::FromFRegister(FT0))); + EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::FromFRegister(FT11))); + + Riscv64ManagedRegister reg_F0 = Riscv64ManagedRegister::FromFRegister(FT0); + EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::FromXRegister(Zero))); + EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::FromXRegister(A1))); + EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::FromXRegister(S2))); + EXPECT_TRUE(reg_F0.Equals(Riscv64ManagedRegister::FromFRegister(FT0))); + EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::FromFRegister(FT1))); + EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::FromFRegister(FT11))); + + Riscv64ManagedRegister reg_F31 = Riscv64ManagedRegister::FromFRegister(FT11); + EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::FromXRegister(Zero))); + EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::FromXRegister(A1))); + EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::FromXRegister(S2))); + EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::FromFRegister(FT0))); + EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::FromFRegister(FT1))); + EXPECT_TRUE(reg_F31.Equals(Riscv64ManagedRegister::FromFRegister(FT11))); +} + +} // namespace riscv64 +} // namespace art diff --git a/compiler/utils/stack_checks.h b/compiler/utils/stack_checks.h index c348f2c8ee..d0fff73df3 100644 --- a/compiler/utils/stack_checks.h +++ b/compiler/utils/stack_checks.h @@ -18,8 +18,9 @@ #define ART_COMPILER_UTILS_STACK_CHECKS_H_ #include "arch/instruction_set.h" +#include "base/macros.h" -namespace art { +namespace art HIDDEN { // Size of a frame that we definitely consider large. Anything larger than this should // definitely get a stack overflow check. diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc deleted file mode 100644 index 6e0773bba4..0000000000 --- a/compiler/utils/swap_space.cc +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "swap_space.h" - -#include <sys/mman.h> - -#include <algorithm> -#include <numeric> - -#include "base/bit_utils.h" -#include "base/macros.h" -#include "base/mutex.h" -#include "thread-current-inl.h" - -namespace art { - -// The chunk size by which the swap file is increased and mapped. -static constexpr size_t kMininumMapSize = 16 * MB; - -static constexpr bool kCheckFreeMaps = false; - -template <typename FreeBySizeSet> -static void DumpFreeMap(const FreeBySizeSet& free_by_size) { - size_t last_size = static_cast<size_t>(-1); - for (const auto& entry : free_by_size) { - if (last_size != entry.size) { - last_size = entry.size; - LOG(INFO) << "Size " << last_size; - } - LOG(INFO) << " 0x" << std::hex << entry.free_by_start_entry->Start() - << " size=" << std::dec << entry.free_by_start_entry->size; - } -} - -void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) { - auto free_by_start_pos = free_by_size_pos->free_by_start_entry; - free_by_size_.erase(free_by_size_pos); - free_by_start_.erase(free_by_start_pos); -} - -inline void SwapSpace::InsertChunk(const SpaceChunk& chunk) { - DCHECK_NE(chunk.size, 0u); - auto insert_result = free_by_start_.insert(chunk); - DCHECK(insert_result.second); - free_by_size_.emplace(chunk.size, insert_result.first); -} - -SwapSpace::SwapSpace(int fd, size_t initial_size) - : fd_(fd), - size_(0), - lock_("SwapSpace lock", static_cast<LockLevel>(LockLevel::kDefaultMutexLevel - 1)) { - // Assume that the file is unlinked. - - InsertChunk(NewFileChunk(initial_size)); -} - -SwapSpace::~SwapSpace() { - // Unmap all mmapped chunks. Nothing should be allocated anymore at - // this point, so there should be only full size chunks in free_by_start_. - for (const SpaceChunk& chunk : free_by_start_) { - if (munmap(chunk.ptr, chunk.size) != 0) { - PLOG(ERROR) << "Failed to unmap swap space chunk at " - << static_cast<const void*>(chunk.ptr) << " size=" << chunk.size; - } - } - // All arenas are backed by the same file. Just close the descriptor. - close(fd_); -} - -template <typename FreeByStartSet, typename FreeBySizeSet> -static size_t CollectFree(const FreeByStartSet& free_by_start, const FreeBySizeSet& free_by_size) { - if (free_by_start.size() != free_by_size.size()) { - LOG(FATAL) << "Size: " << free_by_start.size() << " vs " << free_by_size.size(); - } - - // Calculate over free_by_size. - size_t sum1 = 0; - for (const auto& entry : free_by_size) { - sum1 += entry.free_by_start_entry->size; - } - - // Calculate over free_by_start. - size_t sum2 = 0; - for (const auto& entry : free_by_start) { - sum2 += entry.size; - } - - if (sum1 != sum2) { - LOG(FATAL) << "Sum: " << sum1 << " vs " << sum2; - } - return sum1; -} - -void* SwapSpace::Alloc(size_t size) { - MutexLock lock(Thread::Current(), lock_); - size = RoundUp(size, 8U); - - // Check the free list for something that fits. - // TODO: Smarter implementation. Global biggest chunk, ... - auto it = free_by_start_.empty() - ? free_by_size_.end() - : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() }); - if (it != free_by_size_.end()) { - SpaceChunk old_chunk = *it->free_by_start_entry; - if (old_chunk.size == size) { - RemoveChunk(it); - } else { - // Avoid deallocating and allocating the std::set<> nodes. - // This would be much simpler if we could use replace() from Boost.Bimap. - - // The free_by_start_ map contains disjoint intervals ordered by the `ptr`. - // Shrinking the interval does not affect the ordering. - it->free_by_start_entry->ptr += size; - it->free_by_start_entry->size -= size; - - auto node = free_by_size_.extract(it); - node.value().size -= size; - free_by_size_.insert(std::move(node)); - } - return old_chunk.ptr; - } else { - // Not a big enough free chunk, need to increase file size. - SpaceChunk new_chunk = NewFileChunk(size); - if (new_chunk.size != size) { - // Insert the remainder. - SpaceChunk remainder = { new_chunk.ptr + size, new_chunk.size - size }; - InsertChunk(remainder); - } - return new_chunk.ptr; - } -} - -SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) { -#if !defined(__APPLE__) - size_t next_part = std::max(RoundUp(min_size, kPageSize), RoundUp(kMininumMapSize, kPageSize)); - int result = TEMP_FAILURE_RETRY(ftruncate64(fd_, size_ + next_part)); - if (result != 0) { - PLOG(FATAL) << "Unable to increase swap file."; - } - uint8_t* ptr = reinterpret_cast<uint8_t*>( - mmap(nullptr, next_part, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, size_)); - if (ptr == MAP_FAILED) { - LOG(ERROR) << "Unable to mmap new swap file chunk."; - LOG(ERROR) << "Current size: " << size_ << " requested: " << next_part << "/" << min_size; - LOG(ERROR) << "Free list:"; - DumpFreeMap(free_by_size_); - LOG(ERROR) << "In free list: " << CollectFree(free_by_start_, free_by_size_); - PLOG(FATAL) << "Unable to mmap new swap file chunk."; - } - size_ += next_part; - SpaceChunk new_chunk = {ptr, next_part}; - return new_chunk; -#else - UNUSED(min_size, kMininumMapSize); - LOG(FATAL) << "No swap file support on the Mac."; - UNREACHABLE(); -#endif -} - -// TODO: Full coalescing. -void SwapSpace::Free(void* ptr, size_t size) { - MutexLock lock(Thread::Current(), lock_); - size = RoundUp(size, 8U); - - size_t free_before = 0; - if (kCheckFreeMaps) { - free_before = CollectFree(free_by_start_, free_by_size_); - } - - SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptr), size }; - auto it = free_by_start_.lower_bound(chunk); - if (it != free_by_start_.begin()) { - auto prev = it; - --prev; - CHECK_LE(prev->End(), chunk.Start()); - if (prev->End() == chunk.Start()) { - // Merge *prev with this chunk. - chunk.size += prev->size; - chunk.ptr -= prev->size; - auto erase_pos = free_by_size_.find(FreeBySizeEntry { prev->size, prev }); - DCHECK(erase_pos != free_by_size_.end()); - RemoveChunk(erase_pos); - // "prev" is invalidated but "it" remains valid. - } - } - if (it != free_by_start_.end()) { - CHECK_LE(chunk.End(), it->Start()); - if (chunk.End() == it->Start()) { - // Merge *it with this chunk. - chunk.size += it->size; - auto erase_pos = free_by_size_.find(FreeBySizeEntry { it->size, it }); - DCHECK(erase_pos != free_by_size_.end()); - RemoveChunk(erase_pos); - // "it" is invalidated but we don't need it anymore. - } - } - InsertChunk(chunk); - - if (kCheckFreeMaps) { - size_t free_after = CollectFree(free_by_start_, free_by_size_); - - if (free_after != free_before + size) { - DumpFreeMap(free_by_size_); - CHECK_EQ(free_after, free_before + size) << "Should be " << size << " difference from " << free_before; - } - } -} - -} // namespace art diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h deleted file mode 100644 index 827e9a6366..0000000000 --- a/compiler/utils/swap_space.h +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_SWAP_SPACE_H_ -#define ART_COMPILER_UTILS_SWAP_SPACE_H_ - -#include <stddef.h> -#include <stdint.h> -#include <cstdlib> -#include <list> -#include <set> -#include <vector> - -#include <android-base/logging.h> - -#include "base/logging.h" -#include "base/macros.h" -#include "base/mutex.h" - -namespace art { - -// An arena pool that creates arenas backed by an mmaped file. -class SwapSpace { - public: - SwapSpace(int fd, size_t initial_size); - ~SwapSpace(); - void* Alloc(size_t size) REQUIRES(!lock_); - void Free(void* ptr, size_t size) REQUIRES(!lock_); - - size_t GetSize() { - return size_; - } - - private: - // Chunk of space. - struct SpaceChunk { - // We need mutable members as we keep these objects in a std::set<> (providing only const - // access) but we modify these members while carefully preserving the std::set<> ordering. - mutable uint8_t* ptr; - mutable size_t size; - - uintptr_t Start() const { - return reinterpret_cast<uintptr_t>(ptr); - } - uintptr_t End() const { - return reinterpret_cast<uintptr_t>(ptr) + size; - } - }; - - class SortChunkByPtr { - public: - bool operator()(const SpaceChunk& a, const SpaceChunk& b) const { - return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr); - } - }; - - using FreeByStartSet = std::set<SpaceChunk, SortChunkByPtr>; - - // Map size to an iterator to free_by_start_'s entry. - struct FreeBySizeEntry { - FreeBySizeEntry(size_t sz, FreeByStartSet::const_iterator entry) - : size(sz), free_by_start_entry(entry) { } - - // We need mutable members as we keep these objects in a std::set<> (providing only const - // access) but we modify these members while carefully preserving the std::set<> ordering. - mutable size_t size; - mutable FreeByStartSet::const_iterator free_by_start_entry; - }; - struct FreeBySizeComparator { - bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) const { - if (lhs.size != rhs.size) { - return lhs.size < rhs.size; - } else { - return lhs.free_by_start_entry->Start() < rhs.free_by_start_entry->Start(); - } - } - }; - using FreeBySizeSet = std::set<FreeBySizeEntry, FreeBySizeComparator>; - - SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_); - - void RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) REQUIRES(lock_); - void InsertChunk(const SpaceChunk& chunk) REQUIRES(lock_); - - int fd_; - size_t size_; - - // NOTE: Boost.Bimap would be useful for the two following members. - - // Map start of a free chunk to its size. - FreeByStartSet free_by_start_ GUARDED_BY(lock_); - // Free chunks ordered by size. - FreeBySizeSet free_by_size_ GUARDED_BY(lock_); - - mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - DISALLOW_COPY_AND_ASSIGN(SwapSpace); -}; - -template <typename T> class SwapAllocator; - -template <> -class SwapAllocator<void> { - public: - using value_type = void; - using pointer = void*; - using const_pointer = const void*; - - template <typename U> - struct rebind { - using other = SwapAllocator<U>; - }; - - explicit SwapAllocator(SwapSpace* swap_space) : swap_space_(swap_space) {} - - template <typename U> - SwapAllocator(const SwapAllocator<U>& other) - : swap_space_(other.swap_space_) {} - - SwapAllocator(const SwapAllocator& other) = default; - SwapAllocator& operator=(const SwapAllocator& other) = default; - ~SwapAllocator() = default; - - private: - SwapSpace* swap_space_; - - template <typename U> - friend class SwapAllocator; - - template <typename U> - friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs); -}; - -template <typename T> -class SwapAllocator { - public: - using value_type = T; - using pointer = T*; - using reference = T&; - using const_pointer = const T*; - using const_reference = const T&; - using size_type = size_t; - using difference_type = ptrdiff_t; - - template <typename U> - struct rebind { - using other = SwapAllocator<U>; - }; - - explicit SwapAllocator(SwapSpace* swap_space) : swap_space_(swap_space) {} - - template <typename U> - SwapAllocator(const SwapAllocator<U>& other) - : swap_space_(other.swap_space_) {} - - SwapAllocator(const SwapAllocator& other) = default; - SwapAllocator& operator=(const SwapAllocator& other) = default; - ~SwapAllocator() = default; - - size_type max_size() const { - return static_cast<size_type>(-1) / sizeof(T); - } - - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } - - pointer allocate(size_type n, SwapAllocator<void>::pointer hint ATTRIBUTE_UNUSED = nullptr) { - DCHECK_LE(n, max_size()); - if (swap_space_ == nullptr) { - T* result = reinterpret_cast<T*>(malloc(n * sizeof(T))); - CHECK_IMPLIES(result == nullptr, n == 0u); // Abort if malloc() fails. - return result; - } else { - return reinterpret_cast<T*>(swap_space_->Alloc(n * sizeof(T))); - } - } - void deallocate(pointer p, size_type n) { - if (swap_space_ == nullptr) { - free(p); - } else { - swap_space_->Free(p, n * sizeof(T)); - } - } - - void construct(pointer p, const_reference val) { - new (static_cast<void*>(p)) value_type(val); - } - template <class U, class... Args> - void construct(U* p, Args&&... args) { - ::new (static_cast<void*>(p)) U(std::forward<Args>(args)...); - } - void destroy(pointer p) { - p->~value_type(); - } - - inline bool operator==(SwapAllocator const& other) { - return swap_space_ == other.swap_space_; - } - inline bool operator!=(SwapAllocator const& other) { - return !operator==(other); - } - - private: - SwapSpace* swap_space_; - - template <typename U> - friend class SwapAllocator; - - template <typename U> - friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs); -}; - -template <typename T> -inline bool operator==(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) { - return lhs.swap_space_ == rhs.swap_space_; -} - -template <typename T> -inline bool operator!=(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) { - return !(lhs == rhs); -} - -template <typename T> -using SwapVector = std::vector<T, SwapAllocator<T>>; -template <typename T, typename Comparator> -using SwapSet = std::set<T, Comparator, SwapAllocator<T>>; - -} // namespace art - -#endif // ART_COMPILER_UTILS_SWAP_SPACE_H_ diff --git a/compiler/utils/swap_space_test.cc b/compiler/utils/swap_space_test.cc deleted file mode 100644 index 1650080e66..0000000000 --- a/compiler/utils/swap_space_test.cc +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "utils/swap_space.h" - -#include <fcntl.h> -#include <sys/stat.h> -#include <sys/types.h> - -#include <cstdio> - -#include "gtest/gtest.h" - -#include "base/os.h" -#include "base/unix_file/fd_file.h" -#include "common_runtime_test.h" - -namespace art { - -class SwapSpaceTest : public CommonRuntimeTest { -}; - -static void SwapTest(bool use_file) { - ScratchFile scratch; - int fd = scratch.GetFd(); - unlink(scratch.GetFilename().c_str()); - - SwapSpace pool(fd, 1 * MB); - SwapAllocator<void> alloc(use_file ? &pool : nullptr); - - SwapVector<int32_t> v(alloc); - v.reserve(1000000); - for (int32_t i = 0; i < 1000000; ++i) { - v.push_back(i); - EXPECT_EQ(i, v[i]); - } - - SwapVector<int32_t> v2(alloc); - v2.reserve(1000000); - for (int32_t i = 0; i < 1000000; ++i) { - v2.push_back(i); - EXPECT_EQ(i, v2[i]); - } - - SwapVector<int32_t> v3(alloc); - v3.reserve(500000); - for (int32_t i = 0; i < 1000000; ++i) { - v3.push_back(i); - EXPECT_EQ(i, v2[i]); - } - - // Verify contents. - for (int32_t i = 0; i < 1000000; ++i) { - EXPECT_EQ(i, v[i]); - EXPECT_EQ(i, v2[i]); - EXPECT_EQ(i, v3[i]); - } - - scratch.Close(); -} - -TEST_F(SwapSpaceTest, Memory) { - SwapTest(false); -} - -TEST_F(SwapSpaceTest, Swap) { - SwapTest(true); -} - -} // namespace art diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 861b27e6af..a6b90114b2 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -21,7 +21,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "thread.h" -namespace art { +namespace art HIDDEN { namespace x86 { std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) { diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index c346ba9235..0f7854dc5c 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -32,7 +32,7 @@ #include "offsets.h" #include "utils/assembler.h" -namespace art { +namespace art HIDDEN { namespace x86 { class Immediate : public ValueObject { diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 89c73c0ade..5da6f04402 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -17,11 +17,12 @@ #include "assembler_x86.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "base/malloc_arena_pool.h" #include "base/stl_util.h" #include "utils/assembler_test.h" -namespace art { +namespace art HIDDEN { TEST(AssemblerX86, CreateBuffer) { MallocArenaPool pool; diff --git a/compiler/utils/x86/constants_x86.h b/compiler/utils/x86/constants_x86.h index 477b915bb9..0c0a7d4133 100644 --- a/compiler/utils/x86/constants_x86.h +++ b/compiler/utils/x86/constants_x86.h @@ -25,7 +25,7 @@ #include "base/globals.h" #include "base/macros.h" -namespace art { +namespace art HIDDEN { namespace x86 { enum ByteRegister { diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc index 685f5f1b48..154e50b4e4 100644 --- a/compiler/utils/x86/jni_macro_assembler_x86.cc +++ b/compiler/utils/x86/jni_macro_assembler_x86.cc @@ -18,11 +18,12 @@ #include "base/casts.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "indirect_reference_table.h" #include "lock_word.h" #include "thread.h" #include "utils/assembler.h" -namespace art { +namespace art HIDDEN { namespace x86 { static Register GetScratchRegister() { @@ -165,36 +166,24 @@ void X86JNIMacroAssembler::Store(ManagedRegister mbase, } } -void X86JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { - X86ManagedRegister src = msrc.AsX86(); - CHECK(src.IsCpuRegister()); - __ movl(Address(ESP, dest), src.AsCpuRegister()); -} - void X86JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { X86ManagedRegister src = msrc.AsX86(); CHECK(src.IsCpuRegister()); __ movl(Address(ESP, dest), src.AsCpuRegister()); } -void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) { - __ movl(Address(ESP, dest), Immediate(imm)); -} - -void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) { - Register scratch = GetScratchRegister(); - __ leal(scratch, Address(ESP, fr_offs)); - __ fs()->movl(Address::Absolute(thr_offs), scratch); -} - -void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) { - __ fs()->movl(Address::Absolute(thr_offs), ESP); -} - -void X86JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/, - ManagedRegister /*src*/, - FrameOffset /*in_off*/) { - UNIMPLEMENTED(FATAL); // this case only currently exists for ARM +void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) { + if (tag_sp) { + // There is no free register, store contents onto stack and restore back later. + Register scratch = ECX; + __ movl(Address(ESP, -32), scratch); + __ movl(scratch, ESP); + __ orl(scratch, Immediate(0x2)); + __ fs()->movl(Address::Absolute(thr_offs), scratch); + __ movl(scratch, Address(ESP, -32)); + } else { + __ fs()->movl(Address::Absolute(thr_offs), ESP); + } } void X86JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) { @@ -233,61 +222,6 @@ void X86JNIMacroAssembler::Load(ManagedRegister mdest, } } -void X86JNIMacroAssembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) { - X86ManagedRegister dest = mdest.AsX86(); - if (dest.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (dest.IsCpuRegister()) { - if (size == 1u) { - __ fs()->movzxb(dest.AsCpuRegister(), Address::Absolute(src)); - } else { - CHECK_EQ(4u, size); - __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(src)); - } - } else if (dest.IsRegisterPair()) { - CHECK_EQ(8u, size); - __ fs()->movl(dest.AsRegisterPairLow(), Address::Absolute(src)); - __ fs()->movl(dest.AsRegisterPairHigh(), Address::Absolute(ThreadOffset32(src.Int32Value()+4))); - } else if (dest.IsX87Register()) { - if (size == 4) { - __ fs()->flds(Address::Absolute(src)); - } else { - __ fs()->fldl(Address::Absolute(src)); - } - } else { - CHECK(dest.IsXmmRegister()); - if (size == 4) { - __ fs()->movss(dest.AsXmmRegister(), Address::Absolute(src)); - } else { - __ fs()->movsd(dest.AsXmmRegister(), Address::Absolute(src)); - } - } -} - -void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) { - X86ManagedRegister dest = mdest.AsX86(); - CHECK(dest.IsCpuRegister()); - __ movl(dest.AsCpuRegister(), Address(ESP, src)); -} - -void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) { - X86ManagedRegister dest = mdest.AsX86(); - CHECK(dest.IsCpuRegister() && dest.IsCpuRegister()); - __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs)); - if (unpoison_reference) { - __ MaybeUnpoisonHeapReference(dest.AsCpuRegister()); - } -} - -void X86JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, - ManagedRegister base, - Offset offs) { - X86ManagedRegister dest = mdest.AsX86(); - CHECK(dest.IsCpuRegister() && dest.IsCpuRegister()); - __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs)); -} - void X86JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) { X86ManagedRegister dest = mdest.AsX86(); CHECK(dest.IsCpuRegister()); @@ -402,37 +336,9 @@ void X86JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, siz } } -void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) { - Register scratch = GetScratchRegister(); - __ movl(scratch, Address(ESP, src)); - __ movl(Address(ESP, dest), scratch); -} - -void X86JNIMacroAssembler::CopyRef(FrameOffset dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) { - Register scratch = GetScratchRegister(); - __ movl(scratch, Address(base.AsX86().AsCpuRegister(), offs)); - if (unpoison_reference) { - __ MaybeUnpoisonHeapReference(scratch); - } - __ movl(Address(ESP, dest), scratch); -} - -void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) { - Register scratch = GetScratchRegister(); - __ fs()->movl(scratch, Address::Absolute(thr_offs)); - __ movl(Address(ESP, fr_offs), scratch); -} - -void X86JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { - X86ManagedRegister scratch = mscratch.AsX86(); - CHECK(scratch.IsCpuRegister()); - Load(scratch, fr_offs, 4); - __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister()); +void X86JNIMacroAssembler::Move(ManagedRegister mdest, size_t value) { + X86ManagedRegister dest = mdest.AsX86(); + __ movl(dest.AsCpuRegister(), Immediate(value)); } void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) { @@ -446,67 +352,6 @@ void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) } } -void X86JNIMacroAssembler::Copy(FrameOffset /*dst*/, - ManagedRegister /*src_base*/, - Offset /*src_offset*/, - ManagedRegister /*scratch*/, - size_t /*size*/) { - UNIMPLEMENTED(FATAL); -} - -void X86JNIMacroAssembler::Copy(ManagedRegister dest_base, - Offset dest_offset, - FrameOffset src, - ManagedRegister scratch, - size_t size) { - CHECK(scratch.IsNoRegister()); - CHECK_EQ(size, 4u); - __ pushl(Address(ESP, src)); - __ popl(Address(dest_base.AsX86().AsCpuRegister(), dest_offset)); -} - -void X86JNIMacroAssembler::Copy(FrameOffset dest, - FrameOffset src_base, - Offset src_offset, - ManagedRegister mscratch, - size_t size) { - Register scratch = mscratch.AsX86().AsCpuRegister(); - CHECK_EQ(size, 4u); - __ movl(scratch, Address(ESP, src_base)); - __ movl(scratch, Address(scratch, src_offset)); - __ movl(Address(ESP, dest), scratch); -} - -void X86JNIMacroAssembler::Copy(ManagedRegister dest, - Offset dest_offset, - ManagedRegister src, - Offset src_offset, - ManagedRegister scratch, - size_t size) { - CHECK_EQ(size, 4u); - CHECK(scratch.IsNoRegister()); - __ pushl(Address(src.AsX86().AsCpuRegister(), src_offset)); - __ popl(Address(dest.AsX86().AsCpuRegister(), dest_offset)); -} - -void X86JNIMacroAssembler::Copy(FrameOffset dest, - Offset dest_offset, - FrameOffset src, - Offset src_offset, - ManagedRegister mscratch, - size_t size) { - Register scratch = mscratch.AsX86().AsCpuRegister(); - CHECK_EQ(size, 4u); - CHECK_EQ(dest.Int32Value(), src.Int32Value()); - __ movl(scratch, Address(ESP, src)); - __ pushl(Address(scratch, src_offset)); - __ popl(Address(scratch, dest_offset)); -} - -void X86JNIMacroAssembler::MemoryBarrier(ManagedRegister) { - __ mfence(); -} - void X86JNIMacroAssembler::CreateJObject(ManagedRegister mout_reg, FrameOffset spilled_reference_offset, ManagedRegister min_reg, @@ -547,6 +392,20 @@ void X86JNIMacroAssembler::CreateJObject(FrameOffset out_off, __ movl(Address(ESP, out_off), scratch); } +void X86JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) { + constexpr uint32_t kGlobalOrWeakGlobalMask = + dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetGlobalOrWeakGlobalMask()); + constexpr uint32_t kIndirectRefKindMask = + dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetIndirectRefKindMask()); + __ testl(reg.AsX86().AsCpuRegister(), Immediate(kGlobalOrWeakGlobalMask)); + __ j(kNotZero, X86JNIMacroLabel::Cast(slow_path)->AsX86()); + __ andl(reg.AsX86().AsCpuRegister(), Immediate(~kIndirectRefKindMask)); + __ j(kZero, X86JNIMacroLabel::Cast(resume)->AsX86()); // Skip load for null. + __ movl(reg.AsX86().AsCpuRegister(), Address(reg.AsX86().AsCpuRegister(), /*disp=*/ 0)); +} + void X86JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { // TODO: not validating references } @@ -724,6 +583,12 @@ void X86JNIMacroAssembler::TestMarkBit(ManagedRegister mref, __ j(UnaryConditionToX86Condition(cond), X86JNIMacroLabel::Cast(label)->AsX86()); } + +void X86JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) { + __ cmpb(Address::Absolute(address), Immediate(0)); + __ j(kNotZero, X86JNIMacroLabel::Cast(label)->AsX86()); +} + void X86JNIMacroAssembler::Bind(JNIMacroLabel* label) { CHECK(label != nullptr); __ Bind(X86JNIMacroLabel::Cast(label)->AsX86()); diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h index 29fccfd386..6b177f533b 100644 --- a/compiler/utils/x86/jni_macro_assembler_x86.h +++ b/compiler/utils/x86/jni_macro_assembler_x86.h @@ -27,7 +27,7 @@ #include "offsets.h" #include "utils/jni_macro_assembler.h" -namespace art { +namespace art HIDDEN { namespace x86 { class X86JNIMacroLabel; @@ -59,30 +59,14 @@ class X86JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86Assembler, Poi // Store routines void Store(FrameOffset offs, ManagedRegister src, size_t size) override; void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override; - void StoreRef(FrameOffset dest, ManagedRegister src) override; void StoreRawPtr(FrameOffset dest, ManagedRegister src) override; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override; - - void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) override; - - void StoreStackPointerToThread(ThreadOffset32 thr_offs) override; - - void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override; + void StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) override; // Load routines void Load(ManagedRegister dest, FrameOffset src, size_t size) override; void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override; - void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) override; - - void LoadRef(ManagedRegister dest, FrameOffset src) override; - - void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) override; - - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override; - void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) override; // Copying routines @@ -92,35 +76,7 @@ class X86JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86Assembler, Poi void Move(ManagedRegister dest, ManagedRegister src, size_t size) override; - void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override; - - void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - override; - - void CopyRef(FrameOffset dest, FrameOffset src) override; - void CopyRef(FrameOffset dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) override; - - void Copy(FrameOffset dest, FrameOffset src, size_t size) override; - - void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, - size_t size) override; - - void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, - size_t size) override; - - void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, - size_t size) override; - - void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, - ManagedRegister scratch, size_t size) override; - - void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister scratch, size_t size) override; - - void MemoryBarrier(ManagedRegister) override; + void Move(ManagedRegister dest, size_t value) override; // Sign extension void SignExtend(ManagedRegister mreg, size_t size) override; @@ -132,20 +88,10 @@ class X86JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86Assembler, Poi void GetCurrentThread(ManagedRegister dest) override; void GetCurrentThread(FrameOffset dest_offset) override; - // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value), - // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly - // stale reference that can be used to avoid loading the spilled value to - // see if the value is null. - void CreateJObject(ManagedRegister out_reg, - FrameOffset spilled_reference_offset, - ManagedRegister in_reg, - bool null_allowed) override; - - // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value), - // or to be null if the value is null and `null_allowed`. - void CreateJObject(FrameOffset out_off, - FrameOffset spilled_reference_offset, - bool null_allowed) override; + // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path. + void DecodeJNITransitionOrLocalJObject(ManagedRegister reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) override; // Heap::VerifyObject on src. In some cases (such as a reference to this) we // know that src may not be null. @@ -189,10 +135,29 @@ class X86JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86Assembler, Poi void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; // Emit a conditional jump to the label by applying a unary condition test to object's mark bit. void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; + // Emit a conditional jump to label if the loaded value from specified locations is not zero. + void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override; // Code at this offset will serve as the target for the Jump call. void Bind(JNIMacroLabel* label) override; private: + void Copy(FrameOffset dest, FrameOffset src, size_t size); + + // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value), + // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly + // stale reference that can be used to avoid loading the spilled value to + // see if the value is null. + void CreateJObject(ManagedRegister out_reg, + FrameOffset spilled_reference_offset, + ManagedRegister in_reg, + bool null_allowed); + + // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value), + // or to be null if the value is null and `null_allowed`. + void CreateJObject(FrameOffset out_off, + FrameOffset spilled_reference_offset, + bool null_allowed); + DISALLOW_COPY_AND_ASSIGN(X86JNIMacroAssembler); }; diff --git a/compiler/utils/x86/managed_register_x86.cc b/compiler/utils/x86/managed_register_x86.cc index cc7cedf93e..bef948056f 100644 --- a/compiler/utils/x86/managed_register_x86.cc +++ b/compiler/utils/x86/managed_register_x86.cc @@ -18,7 +18,7 @@ #include "base/globals.h" -namespace art { +namespace art HIDDEN { namespace x86 { // Define register pairs. diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h index 27555bfd32..def4f68b27 100644 --- a/compiler/utils/x86/managed_register_x86.h +++ b/compiler/utils/x86/managed_register_x86.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_UTILS_X86_MANAGED_REGISTER_X86_H_ #define ART_COMPILER_UTILS_X86_MANAGED_REGISTER_X86_H_ +#include "base/macros.h" #include "constants_x86.h" #include "utils/managed_register.h" -namespace art { +namespace art HIDDEN { namespace x86 { // Values for register pairs. diff --git a/compiler/utils/x86/managed_register_x86_test.cc b/compiler/utils/x86/managed_register_x86_test.cc index 28af5313c7..9f5e1970ac 100644 --- a/compiler/utils/x86/managed_register_x86_test.cc +++ b/compiler/utils/x86/managed_register_x86_test.cc @@ -17,9 +17,10 @@ #include "managed_register_x86.h" #include "base/globals.h" +#include "base/macros.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { namespace x86 { TEST(X86ManagedRegister, NoRegister) { diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 21a44810ba..3fdf05bed9 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -21,7 +21,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "thread.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) { diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index ea944c200e..235ea03e2b 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -30,9 +30,8 @@ #include "managed_register_x86_64.h" #include "offsets.h" #include "utils/assembler.h" -#include "utils/jni_macro_assembler.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { // Encodes an immediate value for operands. diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index f7e890d112..a7c206afaa 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -21,13 +21,14 @@ #include <random> #include "base/bit_utils.h" +#include "base/macros.h" #include "base/malloc_arena_pool.h" #include "base/stl_util.h" #include "jni_macro_assembler_x86_64.h" #include "utils/assembler_test.h" #include "utils/jni_macro_assembler_test.h" -namespace art { +namespace art HIDDEN { TEST(AssemblerX86_64, CreateBuffer) { MallocArenaPool pool; diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h index 301c8fc09b..52ac987766 100644 --- a/compiler/utils/x86_64/constants_x86_64.h +++ b/compiler/utils/x86_64/constants_x86_64.h @@ -25,7 +25,7 @@ #include "base/globals.h" #include "base/macros.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { class CpuRegister { diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc index d5d1bbadc9..388845730e 100644 --- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc @@ -19,10 +19,11 @@ #include "base/casts.h" #include "base/memory_region.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "indirect_reference_table.h" #include "lock_word.h" #include "thread.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { static dwarf::Reg DWARFReg(Register reg) { @@ -194,37 +195,21 @@ void X86_64JNIMacroAssembler::Store(ManagedRegister mbase, } } -void X86_64JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { - X86_64ManagedRegister src = msrc.AsX86_64(); - CHECK(src.IsCpuRegister()); - __ movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister()); -} - void X86_64JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { X86_64ManagedRegister src = msrc.AsX86_64(); CHECK(src.IsCpuRegister()); __ movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister()); } -void X86_64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) { - __ movl(Address(CpuRegister(RSP), dest), Immediate(imm)); // TODO(64) movq? -} - -void X86_64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs, - FrameOffset fr_offs) { - CpuRegister scratch = GetScratchRegister(); - __ leaq(scratch, Address(CpuRegister(RSP), fr_offs)); - __ gs()->movq(Address::Absolute(thr_offs, true), scratch); -} - -void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) { - __ gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP)); -} - -void X86_64JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/, - ManagedRegister /*src*/, - FrameOffset /*in_off*/) { - UNIMPLEMENTED(FATAL); // this case only currently exists for ARM +void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs, bool tag_sp) { + if (tag_sp) { + CpuRegister reg = GetScratchRegister(); + __ movq(reg, CpuRegister(RSP)); + __ orq(reg, Immediate(0x2)); + __ gs()->movq(Address::Absolute(thr_offs, true), reg); + } else { + __ gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP)); + } } void X86_64JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) { @@ -263,67 +248,6 @@ void X86_64JNIMacroAssembler::Load(ManagedRegister mdest, } } -void X86_64JNIMacroAssembler::LoadFromThread(ManagedRegister mdest, - ThreadOffset64 src, size_t size) { - X86_64ManagedRegister dest = mdest.AsX86_64(); - if (dest.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (dest.IsCpuRegister()) { - if (size == 1u) { - __ gs()->movzxb(dest.AsCpuRegister(), Address::Absolute(src, true)); - } else { - CHECK_EQ(4u, size); - __ gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true)); - } - } else if (dest.IsRegisterPair()) { - CHECK_EQ(8u, size); - __ gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true)); - } else if (dest.IsX87Register()) { - if (size == 4) { - __ gs()->flds(Address::Absolute(src, true)); - } else { - __ gs()->fldl(Address::Absolute(src, true)); - } - } else { - CHECK(dest.IsXmmRegister()); - if (size == 4) { - __ gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true)); - } else { - __ gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true)); - } - } -} - -void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) { - X86_64ManagedRegister dest = mdest.AsX86_64(); - CHECK(dest.IsCpuRegister()); - __ movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src)); -} - -void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest, - ManagedRegister mbase, - MemberOffset offs, - bool unpoison_reference) { - X86_64ManagedRegister base = mbase.AsX86_64(); - X86_64ManagedRegister dest = mdest.AsX86_64(); - CHECK(base.IsCpuRegister()); - CHECK(dest.IsCpuRegister()); - __ movl(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs)); - if (unpoison_reference) { - __ MaybeUnpoisonHeapReference(dest.AsCpuRegister()); - } -} - -void X86_64JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, - ManagedRegister mbase, - Offset offs) { - X86_64ManagedRegister base = mbase.AsX86_64(); - X86_64ManagedRegister dest = mdest.AsX86_64(); - CHECK(base.IsCpuRegister()); - CHECK(dest.IsCpuRegister()); - __ movq(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs)); -} - void X86_64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) { X86_64ManagedRegister dest = mdest.AsX86_64(); CHECK(dest.IsCpuRegister()); @@ -477,37 +401,10 @@ void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, } } -void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) { - CpuRegister scratch = GetScratchRegister(); - __ movl(scratch, Address(CpuRegister(RSP), src)); - __ movl(Address(CpuRegister(RSP), dest), scratch); -} - -void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) { - CpuRegister scratch = GetScratchRegister(); - __ movl(scratch, Address(base.AsX86_64().AsCpuRegister(), offs)); - if (unpoison_reference) { - __ MaybeUnpoisonHeapReference(scratch); - } - __ movl(Address(CpuRegister(RSP), dest), scratch); -} - -void X86_64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) { - CpuRegister scratch = GetScratchRegister(); - __ gs()->movq(scratch, Address::Absolute(thr_offs, true)); - __ movq(Address(CpuRegister(RSP), fr_offs), scratch); -} -void X86_64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { - X86_64ManagedRegister scratch = mscratch.AsX86_64(); - CHECK(scratch.IsCpuRegister()); - Load(scratch, fr_offs, 8); - __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister()); +void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, size_t value) { + X86_64ManagedRegister dest = mdest.AsX86_64(); + __ movq(dest.AsCpuRegister(), Immediate(value)); } void X86_64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) { @@ -522,67 +419,6 @@ void X86_64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t siz } } -void X86_64JNIMacroAssembler::Copy(FrameOffset /*dst*/, - ManagedRegister /*src_base*/, - Offset /*src_offset*/, - ManagedRegister /*scratch*/, - size_t /*size*/) { - UNIMPLEMENTED(FATAL); -} - -void X86_64JNIMacroAssembler::Copy(ManagedRegister dest_base, - Offset dest_offset, - FrameOffset src, - ManagedRegister scratch, - size_t size) { - CHECK(scratch.IsNoRegister()); - CHECK_EQ(size, 4u); - __ pushq(Address(CpuRegister(RSP), src)); - __ popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset)); -} - -void X86_64JNIMacroAssembler::Copy(FrameOffset dest, - FrameOffset src_base, - Offset src_offset, - ManagedRegister mscratch, - size_t size) { - CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister(); - CHECK_EQ(size, 4u); - __ movq(scratch, Address(CpuRegister(RSP), src_base)); - __ movq(scratch, Address(scratch, src_offset)); - __ movq(Address(CpuRegister(RSP), dest), scratch); -} - -void X86_64JNIMacroAssembler::Copy(ManagedRegister dest, - Offset dest_offset, - ManagedRegister src, - Offset src_offset, - ManagedRegister scratch, - size_t size) { - CHECK_EQ(size, 4u); - CHECK(scratch.IsNoRegister()); - __ pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset)); - __ popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset)); -} - -void X86_64JNIMacroAssembler::Copy(FrameOffset dest, - Offset dest_offset, - FrameOffset src, - Offset src_offset, - ManagedRegister mscratch, - size_t size) { - CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister(); - CHECK_EQ(size, 4u); - CHECK_EQ(dest.Int32Value(), src.Int32Value()); - __ movq(scratch, Address(CpuRegister(RSP), src)); - __ pushq(Address(scratch, src_offset)); - __ popq(Address(scratch, dest_offset)); -} - -void X86_64JNIMacroAssembler::MemoryBarrier(ManagedRegister) { - __ mfence(); -} - void X86_64JNIMacroAssembler::CreateJObject(ManagedRegister mout_reg, FrameOffset spilled_reference_offset, ManagedRegister min_reg, @@ -629,6 +465,19 @@ void X86_64JNIMacroAssembler::CreateJObject(FrameOffset out_off, __ movq(Address(CpuRegister(RSP), out_off), scratch); } +void X86_64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) { + constexpr uint64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask(); + constexpr uint64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask(); + // TODO: Add `testq()` with `imm32` to assembler to avoid using 64-bit pointer as 32-bit value. + __ testl(reg.AsX86_64().AsCpuRegister(), Immediate(kGlobalOrWeakGlobalMask)); + __ j(kNotZero, X86_64JNIMacroLabel::Cast(slow_path)->AsX86_64()); + __ andq(reg.AsX86_64().AsCpuRegister(), Immediate(~kIndirectRefKindMask)); + __ j(kZero, X86_64JNIMacroLabel::Cast(resume)->AsX86_64()); // Skip load for null. + __ movl(reg.AsX86_64().AsCpuRegister(), Address(reg.AsX86_64().AsCpuRegister(), /*disp=*/ 0)); +} + void X86_64JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { // TODO: not validating references } @@ -803,6 +652,13 @@ void X86_64JNIMacroAssembler::TestMarkBit(ManagedRegister mref, __ j(UnaryConditionToX86_64Condition(cond), X86_64JNIMacroLabel::Cast(label)->AsX86_64()); } +void X86_64JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) { + CpuRegister scratch = GetScratchRegister(); + __ movq(scratch, Immediate(address)); + __ cmpb(Address(scratch, 0), Immediate(0)); + __ j(kNotZero, X86_64JNIMacroLabel::Cast(label)->AsX86_64()); +} + void X86_64JNIMacroAssembler::Bind(JNIMacroLabel* label) { CHECK(label != nullptr); __ Bind(X86_64JNIMacroLabel::Cast(label)->AsX86_64()); diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h index e080f0b3df..da0aef9869 100644 --- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h @@ -28,7 +28,7 @@ #include "utils/assembler.h" #include "utils/jni_macro_assembler.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { class X86_64JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86_64Assembler, @@ -60,32 +60,14 @@ class X86_64JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86_64Assemble // Store routines void Store(FrameOffset offs, ManagedRegister src, size_t size) override; void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override; - void StoreRef(FrameOffset dest, ManagedRegister src) override; void StoreRawPtr(FrameOffset dest, ManagedRegister src) override; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override; - - void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs) override; - - void StoreStackPointerToThread(ThreadOffset64 thr_offs) override; - - void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override; + void StoreStackPointerToThread(ThreadOffset64 thr_offs, bool tag_sp) override; // Load routines void Load(ManagedRegister dest, FrameOffset src, size_t size) override; void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override; - void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) override; - - void LoadRef(ManagedRegister dest, FrameOffset src) override; - - void LoadRef(ManagedRegister dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) override; - - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override; - void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override; // Copying routines @@ -95,52 +77,7 @@ class X86_64JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86_64Assemble void Move(ManagedRegister dest, ManagedRegister src, size_t size) override; - void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override; - - void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - override; - - void CopyRef(FrameOffset dest, FrameOffset src) override; - void CopyRef(FrameOffset dest, - ManagedRegister base, - MemberOffset offs, - bool unpoison_reference) override; - - void Copy(FrameOffset dest, FrameOffset src, size_t size) override; - - void Copy(FrameOffset dest, - ManagedRegister src_base, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; - - void Copy(ManagedRegister dest_base, - Offset dest_offset, - FrameOffset src, - ManagedRegister scratch, - size_t size) override; - - void Copy(FrameOffset dest, - FrameOffset src_base, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; - - void Copy(ManagedRegister dest, - Offset dest_offset, - ManagedRegister src, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; - - void Copy(FrameOffset dest, - Offset dest_offset, - FrameOffset src, - Offset src_offset, - ManagedRegister scratch, - size_t size) override; - - void MemoryBarrier(ManagedRegister) override; + void Move(ManagedRegister dest, size_t value) override; // Sign extension void SignExtend(ManagedRegister mreg, size_t size) override; @@ -152,20 +89,10 @@ class X86_64JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86_64Assemble void GetCurrentThread(ManagedRegister dest) override; void GetCurrentThread(FrameOffset dest_offset) override; - // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value), - // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly - // stale reference that can be used to avoid loading the spilled value to - // see if the value is null. - void CreateJObject(ManagedRegister out_reg, - FrameOffset spilled_reference_offset, - ManagedRegister in_reg, - bool null_allowed) override; - - // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value), - // or to be null if the value is null and `null_allowed`. - void CreateJObject(FrameOffset out_off, - FrameOffset spilled_reference_offset, - bool null_allowed) override; + // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path. + void DecodeJNITransitionOrLocalJObject(ManagedRegister reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) override; // Heap::VerifyObject on src. In some cases (such as a reference to this) we // know that src may not be null. @@ -209,10 +136,29 @@ class X86_64JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86_64Assemble void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; // Emit a conditional jump to the label by applying a unary condition test to object's mark bit. void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; + // Emit a conditional jump to label if the loaded value from specified locations is not zero. + void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override; // Code at this offset will serve as the target for the Jump call. void Bind(JNIMacroLabel* label) override; private: + void Copy(FrameOffset dest, FrameOffset src, size_t size); + + // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value), + // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly + // stale reference that can be used to avoid loading the spilled value to + // see if the value is null. + void CreateJObject(ManagedRegister out_reg, + FrameOffset spilled_reference_offset, + ManagedRegister in_reg, + bool null_allowed); + + // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value), + // or to be null if the value is null and `null_allowed`. + void CreateJObject(FrameOffset out_off, + FrameOffset spilled_reference_offset, + bool null_allowed); + DISALLOW_COPY_AND_ASSIGN(X86_64JNIMacroAssembler); }; diff --git a/compiler/utils/x86_64/managed_register_x86_64.cc b/compiler/utils/x86_64/managed_register_x86_64.cc index c0eec9d86c..75ff8aaf1d 100644 --- a/compiler/utils/x86_64/managed_register_x86_64.cc +++ b/compiler/utils/x86_64/managed_register_x86_64.cc @@ -18,7 +18,7 @@ #include "base/globals.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { // Define register pairs. diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h index 62c0e373a7..7a1be0bd8f 100644 --- a/compiler/utils/x86_64/managed_register_x86_64.h +++ b/compiler/utils/x86_64/managed_register_x86_64.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_ #define ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_ +#include "base/macros.h" #include "constants_x86_64.h" #include "utils/managed_register.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { // Values for register pairs. diff --git a/compiler/utils/x86_64/managed_register_x86_64_test.cc b/compiler/utils/x86_64/managed_register_x86_64_test.cc index 46a405ffaf..048268bf9b 100644 --- a/compiler/utils/x86_64/managed_register_x86_64_test.cc +++ b/compiler/utils/x86_64/managed_register_x86_64_test.cc @@ -16,9 +16,10 @@ #include "managed_register_x86_64.h" #include "base/globals.h" +#include "base/macros.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { TEST(X86_64ManagedRegister, NoRegister) { |